From a1708ef12b43241296b90f7157e018db78dc286b Mon Sep 17 00:00:00 2001 From: Tommy Date: Sun, 13 Oct 2024 14:32:14 -0700 Subject: [PATCH] Update robots.txt --- srv/nginx/robots.txt | 62 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/srv/nginx/robots.txt b/srv/nginx/robots.txt index 77470cb..54f42cc 100644 --- a/srv/nginx/robots.txt +++ b/srv/nginx/robots.txt @@ -1,2 +1,64 @@ User-agent: * +Disallow: / + +# Based on https://seirdy.one/robots.txt + +User-agent: Adsbot +Disallow: / +Allow: /ads.txt +Allow: /app-ads.txt + +User-agent: peer39_crawler +User-agent: peer39_crawler/1.0 +User-agent: TurnitinBot +User-agent: NPBot +User-agent: SlySearch +User-agent: BLEXBot +User-agent: CheckMarkNetwork/1.0 (+https://www.checkmarknetwork.com/spider.html) +User-agent: BrandVerity/1.0 +User-agent: PiplBot +User-agent: MJ12bot + +# Based on https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt + +User-agent: AI2Bot +User-agent: Ai2Bot-Dolma +User-agent: Amazonbot +User-agent: Applebot +User-agent: Applebot-Extended +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: Claude-Web +User-agent: ClaudeBot +User-agent: Diffbot +User-agent: FacebookBot +User-agent: FriendlyCrawler +User-agent: GPTBot +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GoogleOther-Image +User-agent: GoogleOther-Video +User-agent: ICC-Crawler +User-agent: ISSCyberRiskCrawler +User-agent: ImagesiftBot +User-agent: Kangaroo Bot +User-agent: Meta-ExternalAgent +User-agent: Meta-ExternalFetcher +User-agent: OAI-SearchBot +User-agent: PerplexityBot +User-agent: PetalBot +User-agent: Scrapy +User-agent: Sidetrade indexer bot +User-agent: Timpibot +User-agent: VelenPublicWebCrawler +User-agent: Webzio-Extended +User-agent: YouBot +User-agent: anthropic-ai +User-agent: cohere-ai +User-agent: facebookexternalhit +User-agent: iaskspider/2.0 +User-agent: img2dataset +User-agent: omgili +User-agent: omgilibot Disallow: / \ No newline at end of file