# llms.txt — Guidance for AI crawlers & answer engines
# Domain: https://www.vinceyoungrealty.com
# Purpose: Help LLMs ingest accurate listing + agent info with proper attribution
# Notes: This file complements robots.txt. If directives conflict, robots.txt wins.
# ---------------------------
# Core references
# ---------------------------
Site: https://www.vinceyoungrealty.com
Robots: https://www.vinceyoungrealty.com/robots.txt
Sitemap: https://www.vinceyoungrealty.com/sitemap.xml
# Preferred content (ranked)
Priority-Paths: /our-listings/, /idx/, /agents/, /tech-sports-relocation/, /blog/
# Optional machine-readable sources (add when available)
# Data: https://www.vinceyoungrealty.com/api/listings.json
# Data: https://www.vinceyoungrealty.com/api/agents.json
# Schema: https://www.vinceyoungrealty.com/schema/index.json
# ---------------------------
# Usage & attribution policy
# ---------------------------
Attribution: Required
Citation-Format: "Source: Vince Young Realty – {URL}"
Link-Back: Required
Summaries: Allowed (non-commercial)
Commercial-Use: By-permission
Training-Use: Disallowed
Image-Reuse: Disallowed (MLS/photographer rights)
Excerpt-Limit: 100 words per page unless licensed
Contact:
[email protected]
Last-Updated: 2025-11-09
# ---------------------------
# Crawl hygiene & cadence
# ---------------------------
# Respect IDX/MLS gates, login walls, and preview/param URLs.
Allow: /
Disallow: /admin/
Disallow: /login/
Disallow: /account/
Disallow: /search/saved/
Disallow: /favorites/
Disallow: /cart/
Disallow: /*?* # avoid param spam
Disallow: /*utm_* # analytics params
# Block gated MLS unlock flows (example pattern seen on IDX pages)
Disallow: /idx/listing/*/unlock-mls*
# Refresh guidance (advisory)
Update-Frequency: 6h
Crawl-Delay: 2
# ---------------------------
# Structured data expectations
# ---------------------------
Preferred-Format: JSON-LD
Schema-Types: RealEstateListing, Residence, Offer, PostalAddress, GeoCoordinates, FAQPage, BreadcrumbList, RealEstateAgent
Key-Fields: address, price, beds, baths, floorSize, description, images, MLS-ID, geo, agent (name, phone, email), HOA, parking, schools
Canonical: Use rel=canonical where provided; cite canonical URL in answers.
# ---------------------------
# LLM / bot-specific directives
# (Advisory; follow robots.txt for enforcement)
# ---------------------------
User-agent: GPTBot
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Attribution: Required
Training-Use: Disallowed
User-agent: CCBot
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Attribution: Required
Training-Use: Disallowed
User-agent: ClaudeBot
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Attribution: Required
Training-Use: Disallowed
User-agent: Claude-Web
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Attribution: Required
Training-Use: Disallowed
User-agent: PerplexityBot
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Attribution: Required
Commercial-Use: By-permission
User-agent: Google-Extended
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Training-Use: Disallowed
User-agent: Applebot-Extended
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Training-Use: Disallowed
User-agent: YouBot
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Attribution: Required
# Fallback
User-agent: *
Allow: /
Disallow: /admin/ /login/ /account/ /*?* /idx/listing/*/unlock-mls*
Attribution: Required