# The FULL URL to the DSpace sitemaps
# The http://ebooks.edu.gr/ebooks will be auto-filled with the value in dspace.cfg
# XML sitemap is listed first as it is preferred by most search engines
#Sitemap: http://ebooks.edu.gr/ebooks/sitemap
#Sitemap: http://ebooks.edu.gr/ebooks/htmlmap
##########################
# Default Access Group
# (NOTE: blank lines are not allowable in a group record)
##########################
User-agent: *
# Disable access to Discovery search and filters
Disallow: /ebooks/simple-search
# Also /ebooks/handle/123456789/3/simple-search?
# NOTE: This does not work with /ebooks*simple-search ! but /* works! tested on google webaster tools
Disallow: /*simple-search
##############################
# Section for misbehaving bots
# The following directives to block specific robots were borrowed from Wikipedia's robots.txt
##############################
# advertising-related bots:
User-agent: Mediapartners-Google*
Disallow: /ebooks
# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
User-agent: UbiCrawler
Disallow: /ebooks
User-agent: DOC
Disallow: /ebooks
User-agent: Zao
Disallow: /ebooks
# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /ebooks
User-agent: Zealbot
Disallow: /ebooks
User-agent: MSIECrawler
Disallow: /ebooks
User-agent: SiteSnagger
Disallow: /ebooks
User-agent: WebStripper
Disallow: /ebooks
User-agent: WebCopier
Disallow: /ebooks
User-agent: Fetch
Disallow: /ebooks
User-agent: Offline Explorer
Disallow: /ebooks
User-agent: Teleport
Disallow: /ebooks
User-agent: TeleportPro
Disallow: /ebooks
User-agent: WebZIP
Disallow: /ebooks
User-agent: linko
Disallow: /ebooks
User-agent: HTTrack
Disallow: /ebooks
User-agent: Microsoft.URL.Control
Disallow: /ebooks
User-agent: Xenu
Disallow: /ebooks
User-agent: larbin
Disallow: /ebooks
User-agent: libwww
Disallow: /ebooks
User-agent: ZyBORG
Disallow: /ebooks
User-agent: Download Ninja
Disallow: /ebooks
# Misbehaving: requests much too fast:
User-agent: fast
Disallow: /ebooks
#
# The 'grub' distributed client has been *very* poorly behaved.
#
User-agent: grub-client
Disallow: /ebooks
#
# Doesn't follow robots.txt anyway, but...
#
User-agent: k2spider
Disallow: /ebooks
#
# Hits many times per second, not acceptable
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /ebooks
# A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/
User-agent: WebReaper
Disallow: /ebooks