# parts of OmegaWiki that should not be accessed/indexed
User-agent: *
Crawl-delay: 5
Sitemap: http://www.omegawiki.org/sitemap/sitemap-index-omegawikib.xml
Disallow: /extensions/
Disallow: /maintenance/
# Disallow: /skins/
Disallow: /downloads/
Disallow: /Category:
Disallow: *DefinedMeaning:
Disallow: /Special:
Disallow: /Special:Randompage
Disallow: /Special:Search
#Disallow: /index.php
Disallow: *diff=
Disallow: /api.php
Disallow: *action=history
Disallow: *action=edit
Disallow: *printable=
# Disallow: *title=MediaWiki
Disallow: *title=Special:
Disallow: *title=Template
Disallow: *dataset=umls
Disallow: *dataset=sp
Disallow: *langfilter=
# disallow setlang for languages with less than 1000 definitions
# i.e. allowed are bg, br, de, en, es, fr, it, nl, pl, pt, sl, sv, tr
Disallow: *setlang=a
Disallow: *setlang=ba
Disallow: *setlang=bc
Disallow: *setlang=be
Disallow: *setlang=bh
Disallow: *setlang=bi
Disallow: *setlang=bj
Disallow: *setlang=bm
Disallow: *setlang=bn
Disallow: *setlang=bo
Disallow: *setlang=bp
Disallow: *setlang=bq
Disallow: *setlang=brh
Disallow: *setlang=bs
Disallow: *setlang=bu
Disallow: *setlang=c
Disallow: *setlang=de-at
Disallow: *setlang=de-ch
Disallow: *setlang=de-formal
Disallow: *setlang=di
Disallow: *setlang=ds
Disallow: *setlang=dt
Disallow: *setlang=dv
Disallow: *setlang=dz
Disallow: *setlang=ee
Disallow: *setlang=el
Disallow: *setlang=em
Disallow: *setlang=en-gb
Disallow: *setlang=eo
Disallow: *setlang=et
Disallow: *setlang=eu
Disallow: *setlang=ex
Disallow: *setlang=fa
Disallow: *setlang=ff
Disallow: *setlang=fi
Disallow: *setlang=fj
Disallow: *setlang=fo
Disallow: *setlang=frc
Disallow: *setlang=frp
Disallow: *setlang=frr
Disallow: *setlang=fu
Disallow: *setlang=fy
Disallow: *setlang=g
Disallow: *setlang=h
Disallow: *setlang=j
Disallow: *setlang=k
Disallow: *setlang=l
Disallow: *setlang=m
Disallow: *setlang=o
Disallow: *setlang=q
Disallow: *setlang=r
Disallow: *setlang=u
Disallow: *setlang=v
Disallow: *setlang=w
Disallow: *setlang=x
Disallow: *setlang=y
Disallow: *setlang=z
# Bot allowed at the moment:
# Google
# msnbot
# yahoo
# Baidu
# others, as uncommented below
# Yahoo alexa rank 4
# User-agent: Slurp
# Disallow: /
# Baidu alexa rank 6
# User-agent: Baiduspider
# Disallow: /
User-agent: 008
Disallow: /
User-agent: AhrefsBot
Disallow: /
User-agent: Butterfly
Disallow: /
User-agent: coccoc
Disallow: /
User-agent: Daumoa
Disallow: /
User-agent: Evrinid
Disallow: /
User-agent: HolmesBot
Disallow: /
# User-agent: ia_archiver
# Disallow: /
User-agent: ICC-Crawler
Disallow: /
User-agent: IntegromeDB
Disallow: /
User-agent: IstellaBot
Disallow: /
# JikeSpider alexa rank 9106
User-agent: JikeSpider
Disallow: /
User-agent: Mail.Ru
Disallow: /
User-agent: merlinkbot
Disallow: /
# NaverBot alexa rank 218
User-agent: NaverBot
Disallow: /
# Sogou, alexa rank 66
User-agent: Sogou
Disallow: /
# Soso, alex rank 49
#User-agent: Sosospider
#Disallow: /
#User-agent: Sosospider+
#Disallow: /
User-agent: Speedy
Disallow: /
User-agent: Spinn3r
Disallow: /
User-agent: Teoma
Disallow: /
User-agent: Twitterbot
Disallow: /
User-agent: UnwindFetchor
Disallow: /
User-agent: WBSearchBot
Disallow: /
# Yandex : alexa rank 22 (Russian website)
#User-agent: YandexBot
#Disallow: /
User-agent: yacybot
Disallow: /
User-agent: Yeti
Disallow: /
User-agent: YisouSpider
Disallow: /
User-agent: YoudaoBot
Disallow: /
# advertising-related bots:
User-agent: Mediapartners-Google*
Disallow: /
# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
User-agent: UbiCrawler
Disallow: /
User-agent: DOC
Disallow: /
User-agent: Zao
Disallow: /
# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: MSIECrawler
Disallow: /
User-agent: SiteSnagger
Disallow: /
User-agent: WebStripper
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: Fetch
Disallow: /
User-agent: Offline Explorer
Disallow: /
User-agent: Teleport
Disallow: /
User-agent: TeleportPro
Disallow: /
User-agent: WebZIP
Disallow: /
User-agent: linko
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: Microsoft.URL.Control
Disallow: /
User-agent: Xenu
Disallow: /
User-agent: larbin
Disallow: /
User-agent: libwww
Disallow: /
User-agent: ZyBORG
Disallow: /
User-agent: Download Ninja
Disallow: /
#
# The 'grub' distributed client has been *very* poorly behaved.
#
User-agent: grub-client
Disallow: /
User-agent: MJ12bot
Disallow: /
#
# Doesn't follow robots.txt anyway, but...
#
User-agent: k2spider
Disallow: /
#
# Hits many times per second, not acceptable
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /
# A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/
User-agent: WebReaper
Disallow: /
User-agent: Twiceler
Disallow: /
# Overly aggressive crawler -- Erik
User-agent: SemrushBot
Disallow: /