Add robots.txt

This commit is contained in:
Ramses Revengeday 2024-07-05 20:57:30 +00:00
parent f4fd4aeac6
commit 00689f3328

101
robots.txt Normal file
View file

@ -0,0 +1,101 @@
# ======================================================================
# Disclaimer:
#
# Any form of web crawling for AI-based technologies or similar purposes
# is strictly prohibited, regardless of whether the specific service is
# listed in this robots.txt file or not.
#
# Legal actions will be taken in case of violations of this policy.
#
# This information is considered binding, as this file is read by web
# crawlers.
# ======================================================================
# Block all Google bots
User-agent: Google-Extended
Disallow: /
User-agent: AdsBot-Google
Disallow: /
User-agent: GoogleOther
Disallow: /
# Block AppleBot
User-agent: Applebot-Extended
Disallow: /
# Block FacebookBot
User-agent: FacebookBot
Disallow: /
# Block Amazonbot
User-agent: Amazonbot
Disallow: /
# Block OpenAI GPT-Bots and custom ChatGPT-User
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
# Block Anthropic Claude bots
User-agent: ClaudeBot
Disallow: /
User-agent: Claude-Web
Disallow: /
# Block PerplexityBot
User-agent: PerplexityBot
Disallow: /
# Block other anthropic bot
User-agent: anthropic-ai
Disallow: /
# Block Cohere's bot
User-agent: cohere-ai
Disallow: /
# Block Diffbot
User-agent: Diffbot
Disallow: /
# Block img2dataset crawler
User-agent: img2dataset
Disallow: /
# Block various friendly crawlers
User-agent: FriendlyCrawler
Disallow: /
User-agent: Bytespider
Disallow: /
# Block CCBot
User-agent: CCBot
Disallow: /
# Block Omgili crawlers
User-agent: omgili
Disallow: /
User-agent: omgilibot
Disallow: /
# Block Peer39 crawlers
User-agent: peer39_crawler
Disallow: /
User-agent: peer39_crawler/1.0
Disallow: /
# Block web crawler financed by the Russian state
User-agent: Awakari
Disallow: /
# Block YouBot
User-agent: YouBot
Disallow: /