robots.txt 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # Our policy
  2. #
  3. # Allowed:
  4. # - Search engine indexers
  5. # - Archival services (e.g. IA)
  6. #
  7. # Disallowed:
  8. # - Marketing or SEO crawlers
  9. # - Bots which are too agressive by default. This is subjective, if you annoy
  10. # our sysadmins you'll be blocked.
  11. #
  12. # Reach out to sir@cmpwn.com if you have questions.
  13. # It doesn't make sense to index these and/or it's expensive:
  14. User-agent: *
  15. Disallow: /*?*
  16. Disallow: /*.tar.gz$
  17. Disallow: /metrics
  18. Disallow: /*/*/blame/*
  19. Disallow: /*/*/log/*
  20. Disallow: /*/*/tree/*
  21. Disallow: /*/*/item/*
  22. # Too aggressive, marketing/SEO
  23. User-agent: SemrushBot
  24. Disallow: /
  25. # Too aggressive, marketing/SEO
  26. User-agent: SemrushBot-SA
  27. Disallow: /
  28. # Marketing/SEO
  29. User-agent: AhrefsBot
  30. Disallow: /
  31. # Marketing/SEO
  32. User-agent: dotbot
  33. Disallow: /
  34. # Marketing/SEO
  35. User-agent: rogerbot
  36. Disallow: /
  37. User-agent: BLEXBot
  38. Disallow: /
  39. # Huwei something or another, badly behaved
  40. User-agent: AspiegelBot
  41. Disallow: /
  42. # Marketing/SEO
  43. User-agent: ZoominfoBot
  44. Disallow: /
  45. # YandexBot is a dickhead, too aggressive
  46. User-agent: Yandex
  47. Disallow: /
  48. # Marketing/SEO
  49. User-agent: MJ12bot
  50. Disallow: /
  51. # Marketing/SEO
  52. User-agent: DataForSeoBot
  53. Disallow: /
  54. # Used for Alexa, I guess, who cares
  55. User-agent: Amazonbot
  56. Disallow: /
  57. # No
  58. User-agent: turnitinbot
  59. Disallow: /
  60. # Does not respect * directives
  61. User-agent: Seekport Crawler
  62. Disallow: /