list-of-ai-web-crawlers/robots.txt

101 lines
1.8 KiB
Text
Raw Permalink Normal View History

2024-07-05 20:57:30 +00:00
# ======================================================================
# Disclaimer:
#
# Any form of web crawling for AI-based technologies or similar purposes
# is strictly prohibited, regardless of whether the specific service is
# listed in this robots.txt file or not.
#
# Legal actions will be taken in case of violations of this policy.
#
# This information is considered binding, as this file is read by web
# crawlers.
# ======================================================================
# Block all Google bots
User-agent: Google-Extended
Disallow: /
User-agent: AdsBot-Google
Disallow: /
User-agent: GoogleOther
Disallow: /
# Block AppleBot
User-agent: Applebot-Extended
Disallow: /
# Block FacebookBot
User-agent: FacebookBot
Disallow: /
# Block Amazonbot
User-agent: Amazonbot
Disallow: /
# Block OpenAI GPT-Bots and custom ChatGPT-User
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
# Block Anthropic Claude bots
User-agent: ClaudeBot
Disallow: /
User-agent: Claude-Web
Disallow: /
# Block PerplexityBot
User-agent: PerplexityBot
Disallow: /
# Block other anthropic bot
User-agent: anthropic-ai
Disallow: /
# Block Cohere's bot
User-agent: cohere-ai
Disallow: /
# Block Diffbot
User-agent: Diffbot
Disallow: /
# Block img2dataset crawler
User-agent: img2dataset
Disallow: /
# Block various friendly crawlers
User-agent: FriendlyCrawler
Disallow: /
User-agent: Bytespider
Disallow: /
# Block CCBot
User-agent: CCBot
Disallow: /
# Block Omgili crawlers
User-agent: omgili
Disallow: /
User-agent: omgilibot
Disallow: /
# Block Peer39 crawlers
User-agent: peer39_crawler
Disallow: /
User-agent: peer39_crawler/1.0
Disallow: /
# Block web crawler financed by the Russian state
User-agent: Awakari
Disallow: /
# Block YouBot
User-agent: YouBot
Disallow: /