#
# robots.txt
#
# This file is to prevent the crawling and indexing of certain parts
# of your site by web crawlers and spiders run by sites like Yahoo!
# and Google. By telling these "robots" where not to go on your site,
# you save bandwidth and server resources.
#
# This file will be ignored unless it is at the root of your host:
# Used: http://example.com/robots.txt
# Ignored: http://example.com/site/robots.txt
#
# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/wc/robots.html
#
# For syntax checking, see:
# http://www.sxw.org.uk/computing/robots/check.html
User-agent: *
# Directories
Disallow: /umbraco/
Disallow: /umbraco_client/
# Files
Disallow: /INSTALL.txt
Disallow: /LICENSE.txt
# Paths (clean URLs)
Allow: /umbraco/plugins/
# Paths (no clean URLs)
# Disallow: /?q=admin/
Disallow: /welcome/$
Disallow: /test*$
# Begin block Bad-Robots from robots.txt
User-agent: ia_archiver
Disallow:/
# SEO-related bots
# Sitemap
Sitemap: http://www.cadets.net.au/sitemap.xml
Sitemap: http://www.cadetnet.net.au/ror.xml
Sitemap: http://www.cadetnet.net.au/urllist.txt
# Sitemap: http://cadetnet.net.au/sitemap.html
# Sitemap: http://cadetnet.net.au/sitemap.xml.gz