# robots.txt — Lokal360.pl
# Wszystkie boty (w tym AI) mają pełny dostęp do treści.
# Przeczytaj llms.txt dla strukturalnego kontekstu o serwisie.

User-agent: *
Allow: /
Disallow: /send.php
Disallow: /audyt-strony.php
Disallow: /audyt-config.php
Disallow: /audyt-config.example.php
Disallow: /cdn-cgi/
Disallow: /logo-lokal360-preview.html
Disallow: /szukaj/
# Cleanup 2026-05-11: usunięte Disallow /_mockup- i /home- — pliki przeniesione
# do _archive/ poza build pipeline. Dawne mockupy A/B nie generują się do dist/.

# SEO 2026-05-26: blokada wariantów cennikowych i konfiguratora.
# Te URL-e mają canonical do bazowej strony (np. /agencja-360/?pkg=foo → /agencja-360/),
# więc nie powinny być indeksowane jako osobne strony. Site Audit SEMrush wykrył
# 463 takich URL generujących 1500+ błędów (structured data, hreflang conflicts,
# crawl depth, only-one-link). Canonical pozostaje, robots blokuje crawl budget.
Disallow: /*?pkg=
Disallow: /*&pkg=
Disallow: /*?branza=
Disallow: /*&branza=
Disallow: /*?strona=
Disallow: /*&strona=
Disallow: /*?fn=
Disallow: /*&fn=

# Google — search engines
User-agent: Googlebot
Allow: /

User-agent: Googlebot-Image
Allow: /

User-agent: Googlebot-News
Allow: /

User-agent: Googlebot-Video
Allow: /

User-agent: GoogleOther
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: AdsBot-Google
Allow: /

User-agent: AdsBot-Google-Mobile
Allow: /

User-agent: Mediapartners-Google
Allow: /

# Bing / Microsoft
User-agent: Bingbot
Allow: /

User-agent: msnbot
Allow: /

User-agent: msnbot-media
Allow: /

# AI crawlers (LLMs)
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: YouBot
Allow: /

User-agent: DuckAssistBot
Allow: /

User-agent: MistralAI-User
Allow: /

# Common Crawl (data dla wielu LLM-ów)
User-agent: CCBot
Allow: /

# ByteDance / TikTok
User-agent: Bytespider
Allow: /

# Amazon (Alexa, Bedrock)
User-agent: Amazonbot
Allow: /

# Other indexers
User-agent: DuckDuckBot
Allow: /

User-agent: YandexBot
Allow: /

User-agent: Slurp
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: Seznam
Allow: /

User-agent: facebookexternalhit
Allow: /

User-agent: FacebookBot
Allow: /

User-agent: meta-externalagent
Allow: /

User-agent: meta-externalfetcher
Allow: /

User-agent: Threads
Allow: /

User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

# Social media link previews
User-agent: Pinterestbot
Allow: /

User-agent: Slackbot
Allow: /

User-agent: Slackbot-LinkExpanding
Allow: /

User-agent: Discordbot
Allow: /

User-agent: TelegramBot
Allow: /

User-agent: WhatsApp
Allow: /

User-agent: SkypeUriPreview
Allow: /

User-agent: Embedly
Allow: /

# Mniej znane wyszukiwarki (regional + niche)
User-agent: Yeti
Allow: /
# Naver (Korea)

User-agent: Daum
Allow: /
# Daum (Korea)

User-agent: Sogou
Allow: /
# Sogou (China)

User-agent: 360Spider
Allow: /
# 360 Search (China)

User-agent: PetalBot
Allow: /
# Petal Search / Huawei

User-agent: AspiegelBot
Allow: /
# Huawei Aspiegel (legacy name PetalBot)

User-agent: Mojeek
Allow: /
# Mojeek (independent UK search engine)

User-agent: ExaBot
Allow: /
# Exalead

User-agent: ia_archiver
Allow: /
# Alexa Internet (legacy) / Wayback Machine

User-agent: archive.org_bot
Allow: /
# Internet Archive Wayback Machine

# Image search engines
User-agent: TinEyeBot
Allow: /
# TinEye reverse image search

User-agent: ImagesiftBot
Allow: /
# Brave Search image crawler

# AI / data extraction
User-agent: Diffbot
Allow: /
# Diffbot — structured data extraction (used by many AI tools)

User-agent: omgilibot
Allow: /
# omgili / Webhose news aggregator

User-agent: DataForSeoBot
Allow: /
# DataForSEO crawler

User-agent: PiplBot
Allow: /
# Pipl people search

User-agent: ProvidersBot
Allow: /
# DataProvider.com crawler

# SEO tools (scrapery, ale dają backlink intelligence dla naszej strony)
User-agent: AhrefsBot
Allow: /

User-agent: SemrushBot
Allow: /

User-agent: SemrushBot-SA
Allow: /

User-agent: DotBot
Allow: /
# Moz (DotBot)

User-agent: rogerbot
Allow: /
# Moz (rogerbot)

User-agent: MJ12bot
Allow: /
# Majestic SEO

User-agent: BLEXBot
Allow: /
# WebMeUp Backlink Explorer

User-agent: SeznamBot
Allow: /
# Seznam (Czech) — alias

# Brand monitoring / mentions
User-agent: AwarioBot
Allow: /
# Awario brand monitoring

User-agent: AwarioRssBot
Allow: /

User-agent: AwarioSmartBot
Allow: /

# Sitemap (auto-generated by Astro)
Sitemap: https://lokal360.pl/sitemap-index.xml

# RSS feed (świeże treści blogowe — dla Feedly, Inoreader, AI-crawlerów)
# https://lokal360.pl/feed.xml

# IndexNow protocol — natychmiastowe powiadomienia dla Bing/Yandex/Seznam
# Klucz: https://lokal360.pl/6e7dc1d7e80a4acb8857c9685e21cef1.txt

# llms.txt / llms-pl.txt / llms-full.txt — strukturalny kontekst dla AI (Claude, GPT, Perplexity)
# https://lokal360.pl/llms.txt        (EN, ogólny opis serwisu)
# https://lokal360.pl/llms-pl.txt     (PL, ogólny opis serwisu)
# https://lokal360.pl/llms-full.txt   (pełny index 130+ URL)
# https://lokal360.pl/ai.txt          (polityka cytowania zgodna z spawning.ai)