User-Agent: * # applies to all robots Disallow: toc.html # table of contents Disallow: /addinfo/ # addinfo subdirectory Disallow: /addinfo/images/ # images subdirectory Disallow: /animation/ # animations subdirectory Disallow: /cgi-bin/ # executables subdirectory Disallow: /chap1/ # chapter 1 subdirectory Disallow: /chap2/ # chapter 2 subdirectory Disallow: /chap3/ # chapter 3 subdirectory Disallow: /chap4/ # chapter 4 subdirectory Disallow: /chap5/ # chapter 5 subdirectory Disallow: /chap6/ # chapter 6 subdirectory Disallow: /chap7/ # chapter 7 subdirectory Disallow: /chap8/ # chapter 8 subdirectory Disallow: /chap9/ # chapter 9 subdirectory Disallow: /chap10/ # chapter 10 subdirectory Disallow: toc.html # table of contents# exclude help system from robots # following is from default robots.txt in Mandriva 2006 Disallow: /manual/ Disallow: /manual-2.0/ Disallow: /manual-1.3/ Disallow: /addon-modules/ Disallow: /doc/ Disallow: /images/ # the next line is a spam bot trap, for grepping the logs. you should _really_ change this to something else... #Disallow: /all_our_e-mail_addresses Disallow: /axx_our_e-mail_addrexxes # same idea here... Disallow: /admin/ # but allow htdig to index our doc-tree #User-agent: htdig #Disallow: # disallow stress test user-agent: stress-agent Disallow: /