Commit 112ac4ab authored by Pedro Eduardo Trujillo's avatar Pedro Eduardo Trujillo
Browse files

Merge branch 'dev' into 'master'

Separa bots malos de crawlers, mejora evaluación

See merge request redmic-project/nginx-proxy!6
parents 6ea68240 0591a476
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
#From https://github.com/mariusv/nginx-badbot-blocker/commit/ccf5860
# From https://github.com/mariusv/nginx-badbot-blocker/blob/ccf5860866e4f251066c05b03b675b4f6bcd886b/blockips.conf

deny 178.238.234.1;
deny 76.90.254.19;
+18 −3
Original line number Diff line number Diff line
#From https://github.com/mariusv/nginx-badbot-blocker/commit/173e214
# From https://github.com/mariusv/nginx-badbot-blocker/blob/173e214c69aedd68023005f8a3378583acfed033/blacklist.conf

map $http_user_agent $bad_bot {
	default 0;
	~*(RedmicTasks) 0;
	default 0;
	~*^Lynx 0; # Let Lynx go through
	libwww-perl 1;
	~*(Googlebot|BingBot|Slurp|yandex|MSNBot) 1;
	~*(?i)(80legs|360Spider|Aboundex|Abonti|Acunetix|^AIBOT|^Alexibot|Alligator|AllSubmitter|Apexoo|^asterias|^attach|^BackDoorBot|^BackStreet|^BackWeb|Badass|Bandit|Baid|Baiduspider|^BatchFTP|^Bigfoot|^Black.Hole|^BlackWidow|BlackWidow|^BlowFish|Blow|^BotALot|Buddy|^BuiltBotTough|^Bullseye|^BunnySlippers|BBBike|^Cegbfeieh|^CheeseBot|^CherryPicker|^ChinaClaw|^Cogentbot|CPython|Collector|cognitiveseo|Copier|^CopyRightCheck|^cosmos|^Crescent|CSHttp|^Custo|^Demon|^Devil|^DISCo|^DIIbot|discobot|^DittoSpyder|Download.Demon|Download.Devil|Download.Wonder|^dragonfly|^Drip|^eCatch|^EasyDL|^ebingbong|^EirGrabber|^EmailCollector|^EmailSiphon|^EmailWolf|^EroCrawler|^Exabot|^Express|Extractor|^EyeNetIE|FHscan|^FHscan|^flunky|^Foobot|^FrontPage|GalaxyBot|^gotit|Grabber|^GrabNet|^Grafula|^Harvest|^HEADMasterSEO|^hloader|^HMView|^HTTrack|httrack|HTTrack|htmlparser|^humanlinks|^IlseBot|Image.Stripper|Image.Sucker|imagefetch|^InfoNaviRobot|^InfoTekies|^Intelliseek|^InterGET|^Iria|^Jakarta|^JennyBot|^JetCar|JikeSpider|^JOC|^JustView|^Jyxobot|^Kenjin.Spider|^Keyword.Density|libwww|^larbin|LeechFTP|LeechGet|^LexiBot|^lftp|^libWeb|^likse|^LinkextractorPro|^LinkScan|^LNSpiderguy|^LinkWalker|msnbot|MSIECrawler|MJ12bot|MegaIndex|^Magnet|^Mag-Net|^MarkWatch|Mass.Downloader|masscan|^Mata.Hari|^Memo|^MIIxpc|^NAMEPROTECT|^Navroad|^NearSite|^NetAnts|^Netcraft|^NetMechanic|^NetSpider|^NetZIP|^NextGenSearchBot|^NICErsPRO|^niki-bot|^NimbleCrawler|^Ninja|^Nmap|nmap|^NPbot|Offline.Explorer|Offline.Navigator|OpenLinkProfiler|^Octopus|^Openfind|^OutfoxBot|Pixray|probethenet|proximic|^PageGrabber|^pavuk|^pcBrowser|^Pockey|^ProPowerBot|^ProWebWalker|^psbot|^Pump|python-requests|^QueryN.Metasearch|^RealDownload|Reaper|^Reaper|^Ripper|Ripper|Recorder|^ReGet|^RepoMonkey|^RMA|scanbot|SEOkicks-Robot|seoscanners|^Stripper|^Sucker|Siphon|Siteimprove|^SiteSnagger|SiteSucker|^SlySearch|^SmartDownload|^Snake|^Snapbot|^Snoopy|Sosospider|^sogou|spbot|^SpaceBison|^spanner|^SpankBot|Spinn3r|^Sqworm|Sqworm|Stripper|Sucker|^SuperBot|SuperHTTP|^SuperHTTP|^Surfbot|^suzuran|^Szukacz|^tAkeOut|^Teleport|^Telesoft|^TurnitinBot|^The.Intraformant|^TheNomad|^TightTwatBot|^Titan|^True_Robot|^turingos|^TurnitinBot|^URLy.Warning|^Vacuum|^VCI|VidibleScraper|^VoidEYE|^WebAuto|^WebBandit|^WebCopier|^WebEnhancer|^WebFetch|^Web.Image.Collector|^WebLeacher|^WebmasterWorldForumBot|WebPix|^WebReaper|^WebSauger|Website.eXtractor|^Webster|WebShag|^WebStripper|WebSucker|^WebWhacker|^WebZIP|Whack|Whacker|^Widow|Widow|WinHTTrack|^WISENutbot|WWWOFFLE|^WWWOFFLE|^WWW-Collector-E|^Xaldon|^Xenu|^Zade|^Zeus|ZmEu|^Zyborg|SemrushBot|^WebFuck|^MJ12bot|^majestic12|^WallpapersHD) 1;
}

map $http_user_agent $crawler_bot {
	default 0;
	~*(Googlebot|BingBot|Slurp|yandex|MSNBot) 1;
}

map $http_host $block_bad_bot_by_domain {
	hostnames;
	default 1;
	atlas.* 0;
}

map $http_host $block_crawler_bot_by_domain {
	hostnames;
	default 1;
}

## Add here all referrers that are to blocked.
map $http_referer $bad_referer {
	default 0;
+18 −2
Original line number Diff line number Diff line
#From https://github.com/mariusv/nginx-badbot-blocker/commit/173e214
# From https://github.com/mariusv/nginx-badbot-blocker/blob/173e214c69aedd68023005f8a3378583acfed033/blacklist.conf

map $http_user_agent $bad_bot {
	default 0;
	~*(RedmicTasks) 0;
	default 0;
	~*^Lynx 0; # Let Lynx go through
	libwww-perl 1;
	~*(?i)(80legs|360Spider|Aboundex|Abonti|Acunetix|^AIBOT|^Alexibot|Alligator|AllSubmitter|Apexoo|^asterias|^attach|^BackDoorBot|^BackStreet|^BackWeb|Badass|Bandit|Baid|Baiduspider|^BatchFTP|^Bigfoot|^Black.Hole|^BlackWidow|BlackWidow|^BlowFish|Blow|^BotALot|Buddy|^BuiltBotTough|^Bullseye|^BunnySlippers|BBBike|^Cegbfeieh|^CheeseBot|^CherryPicker|^ChinaClaw|^Cogentbot|CPython|Collector|cognitiveseo|Copier|^CopyRightCheck|^cosmos|^Crescent|CSHttp|^Custo|^Demon|^Devil|^DISCo|^DIIbot|discobot|^DittoSpyder|Download.Demon|Download.Devil|Download.Wonder|^dragonfly|^Drip|^eCatch|^EasyDL|^ebingbong|^EirGrabber|^EmailCollector|^EmailSiphon|^EmailWolf|^EroCrawler|^Exabot|^Express|Extractor|^EyeNetIE|FHscan|^FHscan|^flunky|^Foobot|^FrontPage|GalaxyBot|^gotit|Grabber|^GrabNet|^Grafula|^Harvest|^HEADMasterSEO|^hloader|^HMView|^HTTrack|httrack|HTTrack|htmlparser|^humanlinks|^IlseBot|Image.Stripper|Image.Sucker|imagefetch|^InfoNaviRobot|^InfoTekies|^Intelliseek|^InterGET|^Iria|^Jakarta|^JennyBot|^JetCar|JikeSpider|^JOC|^JustView|^Jyxobot|^Kenjin.Spider|^Keyword.Density|libwww|^larbin|LeechFTP|LeechGet|^LexiBot|^lftp|^libWeb|^likse|^LinkextractorPro|^LinkScan|^LNSpiderguy|^LinkWalker|msnbot|MSIECrawler|MJ12bot|MegaIndex|^Magnet|^Mag-Net|^MarkWatch|Mass.Downloader|masscan|^Mata.Hari|^Memo|^MIIxpc|^NAMEPROTECT|^Navroad|^NearSite|^NetAnts|^Netcraft|^NetMechanic|^NetSpider|^NetZIP|^NextGenSearchBot|^NICErsPRO|^niki-bot|^NimbleCrawler|^Ninja|^Nmap|nmap|^NPbot|Offline.Explorer|Offline.Navigator|OpenLinkProfiler|^Octopus|^Openfind|^OutfoxBot|Pixray|probethenet|proximic|^PageGrabber|^pavuk|^pcBrowser|^Pockey|^ProPowerBot|^ProWebWalker|^psbot|^Pump|python-requests|^QueryN.Metasearch|^RealDownload|Reaper|^Reaper|^Ripper|Ripper|Recorder|^ReGet|^RepoMonkey|^RMA|scanbot|SEOkicks-Robot|seoscanners|^Stripper|^Sucker|Siphon|Siteimprove|^SiteSnagger|SiteSucker|^SlySearch|^SmartDownload|^Snake|^Snapbot|^Snoopy|Sosospider|^sogou|spbot|^SpaceBison|^spanner|^SpankBot|Spinn3r|^Sqworm|Sqworm|Stripper|Sucker|^SuperBot|SuperHTTP|^SuperHTTP|^Surfbot|^suzuran|^Szukacz|^tAkeOut|^Teleport|^Telesoft|^TurnitinBot|^The.Intraformant|^TheNomad|^TightTwatBot|^Titan|^True_Robot|^turingos|^TurnitinBot|^URLy.Warning|^Vacuum|^VCI|VidibleScraper|^VoidEYE|^WebAuto|^WebBandit|^WebCopier|^WebEnhancer|^WebFetch|^Web.Image.Collector|^WebLeacher|^WebmasterWorldForumBot|WebPix|^WebReaper|^WebSauger|Website.eXtractor|^Webster|WebShag|^WebStripper|WebSucker|^WebWhacker|^WebZIP|Whack|Whacker|^Widow|Widow|WinHTTrack|^WISENutbot|WWWOFFLE|^WWWOFFLE|^WWW-Collector-E|^Xaldon|^Xenu|^Zade|^Zeus|ZmEu|^Zyborg|SemrushBot|^WebFuck|^MJ12bot|^majestic12|^WallpapersHD) 1;
}

map $http_user_agent $crawler_bot {
	default 0;
	~*(Googlebot|BingBot|Slurp|yandex|MSNBot) 1;
}

map $http_host $block_bad_bot_by_domain {
	hostnames;
	default 1;
	atlas.* 0;
}

map $http_host $block_crawler_bot_by_domain {
	hostnames;
	default 0;
}

## Add here all referrers that are to blocked.
map $http_referer $bad_referer {
	default 0;
+1 −1
Original line number Diff line number Diff line
@@ -15,9 +15,9 @@ server {
	listen 443 ssl http2 default_server;

	include /etc/nginx/snippets/ssl-well-known.conf;
	include /etc/nginx/snippets/block-bad-request.conf;

	location / {
		include /etc/nginx/snippets/block-bad-request.conf;
		include /etc/nginx/snippets/cache-usage.conf;

		add_header X-Frame-Options "SAMEORIGIN";
+16 −0
Original line number Diff line number Diff line
if ($bad_bot) {
	set $block_bad_bot 'O';
}
if ($block_bad_bot_by_domain) {
	set $block_bad_bot '${block_bad_bot}K';
}
if ($block_bad_bot = 'OK') {
	return 444;
}

if ($crawler_bot) {
	set $block_crawler_bot 'O';
}
if ($block_crawler_bot_by_domain) {
	set $block_crawler_bot '${block_crawler_bot}K';
}
if ($block_crawler_bot = 'OK') {
	return 444;
}