Just curious...
Oct. 31st, 2004   8:59pm

Did you ever try just banning "bad bots"?  Here’s a pretty comprehensive list (you would need to modify it some as I don’t know if you are using mod_rewrite and obviously the custom platform issue would need modification but it’s the list of bad hungry bots that are most important rather than the implementation method you employ.  I also ban turnitin bot as well as it tends to get stuck in our forums.

# $Id: htaccess.txt,v 1.10 2003/12/22 10:37:29 rcastley Exp $
#
# htaccess file for SEF
# @package Mambo Open Source
# @Copyright (C) 2000 - 2003 Miro International Pty Ltd
# @ All rights reserved
# @ Mambo Open Source is Free Software
# @ Released under GNU/GPL License : http://www.gnu.org/copyleft/gpl.html
# @version $Revision: 1.10 $


#
#  mod_rewrite in use
#

RewriteEngine On

#  Uncomment following line if your webserver’s URL
#  is not directly related to physical file paths.
#  Update YourMamboDirectory (just / for root)

# RewriteBase /YourMamboDirectory

#
#  Rules
#

RewriteRule ^content(.*) index.php
RewriteRule ^component/(.*) index.php

# deny most common except .php
<FilesMatch "\.(inc|tpl|h|ihtml|sql|ini|configuration|class|bin|spd|theme|module)$">
deny from all
</FilesMatch>

<Limit GET PUT POST>
  Order Allow,Deny
  Allow from all
</Limit>

<Files .htaccess>
order allow,deny
deny from all
</Files>

#The next lines check for Email Spammers Robots and redirect them to a fake page
RewriteCond %{HTTP_USER_AGENT} ^Alexibot                [OR]
RewriteCond %{HTTP_USER_AGENT} ^asterias                [OR]
RewriteCond %{HTTP_USER_AGENT} ^BackDoorBot             [OR]
RewriteCond %{HTTP_USER_AGENT} ^Black.Hole              [OR]
RewriteCond %{HTTP_USER_AGENT} ^BlackWidow              [OR]
RewriteCond %{HTTP_USER_AGENT} ^BlowFish                [OR]
RewriteCond %{HTTP_USER_AGENT} ^BotALot                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^BuiltBotTough           [OR]
RewriteCond %{HTTP_USER_AGENT} ^Bullseye                [OR]
RewriteCond %{HTTP_USER_AGENT} ^BunnySlippers           [OR]
RewriteCond %{HTTP_USER_AGENT} ^Cegbfeieh               [OR]
RewriteCond %{HTTP_USER_AGENT} ^CheeseBot               [OR]
RewriteCond %{HTTP_USER_AGENT} ^CherryPicker            [OR]
RewriteCond %{HTTP_USER_AGENT} ^ChinaClaw               [OR]
RewriteCond %{HTTP_USER_AGENT} ^CopyRightCheck          [OR]
RewriteCond %{HTTP_USER_AGENT} ^cosmos                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^Crescent                [OR]
RewriteCond %{HTTP_USER_AGENT} ^Custo                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^DISCo                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^DittoSpyder             [OR]
RewriteCond %{HTTP_USER_AGENT} ^Download\ Demon         [OR]
RewriteCond %{HTTP_USER_AGENT} ^eCatch                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^EirGrabber              [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailCollector          [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailSiphon             [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailWolf               [OR]
RewriteCond %{HTTP_USER_AGENT} ^EroCrawler              [OR]
RewriteCond %{HTTP_USER_AGENT} ^Express\ WebPictures    [OR]
RewriteCond %{HTTP_USER_AGENT} ^ExtractorPro            [OR]
RewriteCond %{HTTP_USER_AGENT} ^EyeNetIE                [OR]
RewriteCond %{HTTP_USER_AGENT} ^FlashGet                [OR]
RewriteCond %{HTTP_USER_AGENT} ^Foobot                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^FrontPage               [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^GetRight                [OR]
RewriteCond %{HTTP_USER_AGENT} ^GetWeb!                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^Go-Ahead-Got-It         [OR]
RewriteCond %{HTTP_USER_AGENT} ^Googlebot-Image         [OR]
RewriteCond %{HTTP_USER_AGENT} ^Go!Zilla                [OR]
RewriteCond %{HTTP_USER_AGENT} ^GrabNet                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^Grafula                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^Harvest                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^hloader                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^HMView                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^httplib                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^HTTrack                 [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^humanlinks              [OR]
RewriteCond %{HTTP_USER_AGENT} ^ia_archiver             [OR]
RewriteCond %{HTTP_USER_AGENT} ^Image\ Stripper         [OR]
RewriteCond %{HTTP_USER_AGENT} ^Image\ Sucker           [OR]
RewriteCond %{HTTP_USER_AGENT} ^Indy\ Library           [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^InfoNaviRobot           [OR]
RewriteCond %{HTTP_USER_AGENT} ^InterGET                [OR]
RewriteCond %{HTTP_USER_AGENT} ^Internet\ Ninja         [OR]
RewriteCond %{HTTP_USER_AGENT} ^JennyBot                [OR]
RewriteCond %{HTTP_USER_AGENT} ^JetCar                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^JOC\ Web\ Spider        [OR]
RewriteCond %{HTTP_USER_AGENT} ^Kenjin.Spider           [OR]
RewriteCond %{HTTP_USER_AGENT} ^Keyword.Density         [OR]
RewriteCond %{HTTP_USER_AGENT} ^larbin                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^LeechFTP                [OR]
RewriteCond %{HTTP_USER_AGENT} ^LexiBot                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^libWeb/clsHTTP          [OR]
RewriteCond %{HTTP_USER_AGENT} ^LinkextractorPro        [OR]
RewriteCond %{HTTP_USER_AGENT} ^LinkScan/8.1a.Unix      [OR]
RewriteCond %{HTTP_USER_AGENT} ^LinkWalker              [OR]
RewriteCond %{HTTP_USER_AGENT} ^lwp-trivial             [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mass\ Downloader        [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mata.Hari               [OR]
RewriteCond %{HTTP_USER_AGENT} ^Microsoft.URL           [OR]
RewriteCond %{HTTP_USER_AGENT} ^MIDown\ tool            [OR]
RewriteCond %{HTTP_USER_AGENT} ^MIIxpc                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mister.PiX              [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mister\ PiX             [OR]
RewriteCond %{HTTP_USER_AGENT} ^moget                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mozilla/2               [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mozilla/3.Mozilla/2.01  [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mozilla.*NEWT           [OR]
RewriteCond %{HTTP_USER_AGENT} ^Navroad                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^NearSite                [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetAnts                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetMechanic             [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetSpider               [OR]
RewriteCond %{HTTP_USER_AGENT} ^Net\ Vampire            [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetZIP                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^NICErsPRO               [OR]
RewriteCond %{HTTP_USER_AGENT} ^NPBot                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^Octopus                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline.Explorer        [OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline\ Explorer       [OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline\ Navigator      [OR]
RewriteCond %{HTTP_USER_AGENT} ^Openfind                [OR]
RewriteCond %{HTTP_USER_AGENT} ^PageGrabber             [OR]
RewriteCond %{HTTP_USER_AGENT} ^Papa\ Foto              [OR]
RewriteCond %{HTTP_USER_AGENT} ^pavuk                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^pcBrowser               [OR]
RewriteCond %{HTTP_USER_AGENT} ^ProPowerBot/2.14        [OR]
RewriteCond %{HTTP_USER_AGENT} ^ProWebWalker            [OR]
RewriteCond %{HTTP_USER_AGENT} ^ProWebWalker            [OR]
RewriteCond %{HTTP_USER_AGENT} ^QueryN.Metasearch       [OR]
RewriteCond %{HTTP_USER_AGENT} ^ReGet                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^RepoMonkey              [OR]
RewriteCond %{HTTP_USER_AGENT} ^RMA                     [OR]
RewriteCond %{HTTP_USER_AGENT} ^SiteSnagger             [OR]
RewriteCond %{HTTP_USER_AGENT} ^SlySearch               [OR]
RewriteCond %{HTTP_USER_AGENT} ^SmartDownload           [OR]
RewriteCond %{HTTP_USER_AGENT} ^SpankBot                [OR]
RewriteCond %{HTTP_USER_AGENT} ^spanner                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperBot                [OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperHTTP               [OR]
RewriteCond %{HTTP_USER_AGENT} ^Surfbot                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^suzuran                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^Szukacz/1.4             [OR]
RewriteCond %{HTTP_USER_AGENT} ^tAkeOut                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^Teleport                [OR]
RewriteCond %{HTTP_USER_AGENT} ^Teleport\ Pro           [OR]
RewriteCond %{HTTP_USER_AGENT} ^Telesoft                [OR]
RewriteCond %{HTTP_USER_AGENT} ^The.Intraformant        [OR]
RewriteCond %{HTTP_USER_AGENT} ^TheNomad                [OR]
RewriteCond %{HTTP_USER_AGENT} ^TightTwatBot            [OR]
RewriteCond %{HTTP_USER_AGENT} ^Titan                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^toCrawl/UrlDispatcher   [OR]
RewriteCond %{HTTP_USER_AGENT} ^toCrawl/UrlDispatcher   [OR]
RewriteCond %{HTTP_USER_AGENT} ^True_Robot              [OR]
RewriteCond %{HTTP_USER_AGENT} ^turingos                [OR]
RewriteCond %{HTTP_USER_AGENT} ^TurnitinBot/1.5         [OR]
RewriteCond %{HTTP_USER_AGENT} ^URLy.Warning            [OR]
RewriteCond %{HTTP_USER_AGENT} ^VCI                     [OR]
RewriteCond %{HTTP_USER_AGENT} ^VoidEYE                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebAuto                 [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebBandit               [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebCopier               [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebEMailExtrac.*        [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebEnhancer             [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebFetch                [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebGo\ IS               [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web.Image.Collector     [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web\ Image\ Collector   [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebLeacher              [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebmasterWorldForumBot  [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebReaper               [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebSauger               [OR]
RewriteCond %{HTTP_USER_AGENT} ^Website\ eXtractor      [OR]
RewriteCond %{HTTP_USER_AGENT} ^Website.Quester         [OR]
RewriteCond %{HTTP_USER_AGENT} ^Website\ Quester        [OR]
RewriteCond %{HTTP_USER_AGENT} ^Webster.Pro             [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebStripper             [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web\ Sucker             [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebWhacker              [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebZip                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^Wget                    [OR]
RewriteCond %{HTTP_USER_AGENT} ^Widow                   [OR]
RewriteCond %{HTTP_USER_AGENT} ^[Ww]eb[Bb]andit         [OR]
RewriteCond %{HTTP_USER_AGENT} ^WWW-Collector-E         [OR]
RewriteCond %{HTTP_USER_AGENT} ^WWWOFFLE                [OR]
RewriteCond %{HTTP_USER_AGENT} ^Xaldon\ WebSpider       [OR]
RewriteCond %{HTTP_USER_AGENT} ^Xenu’s                  [OR]
RewriteCond %{HTTP_USER_AGENT} ^Zeus
RewriteRule ^.*$ /noindex.php  [L]

#RewriteCond %{HTTP_USER_AGENT}  ^-?$
#RewriteRule ^.*$ /www/spamcop/noID.php [L]

RewriteRule ^index.html index.php [L]

Betsy
Rare Disease Search Engine, Homeschool Sites, Online Homeschool, Online Income, Ethical Adsense, Creative writing, Family Web Hosting, Christian Radio, Tulsa Parks