#!/usr/bin/env python3
"""
URL Pattern Generator for Programmatic SEO

Generates URL patterns and page templates from a data source.
Helps plan template-based page generation at scale.

Usage:
  python3 url_pattern_generator.py                    # Demo mode
  python3 url_pattern_generator.py data.json          # From data file
  python3 url_pattern_generator.py data.json --json   # JSON output

Input format (JSON):
{
  "template": "{tool}-vs-{competitor}-comparison",
  "variables": {
    "tool": ["slack", "teams", "discord"],
    "competitor": ["zoom", "webex"]
  },
  "base_url": "https://example.com/compare"
}
"""

import json
import sys
import os
from itertools import product as cartesian_product


def generate_urls(config):
    """Generate all URL combinations from template and variables."""
    template = config["template"]
    variables = config["variables"]
    base_url = config.get("base_url", "https://example.com")

    var_names = list(variables.keys())
    var_values = [variables[name] for name in var_names]

    urls = []
    for combo in cartesian_product(*var_values):
        mapping = dict(zip(var_names, combo))

        # Skip self-comparisons
        values = list(mapping.values())
        if len(values) != len(set(values)):
            continue

        slug = template
        for key, val in mapping.items():
            slug = slug.replace("{" + key + "}", str(val).lower().replace(" ", "-"))

        url = f"{base_url}/{slug}"
        urls.append({
            "url": url,
            "slug": slug,
            "variables": mapping
        })

    return urls


def analyze_patterns(urls, config):
    """Analyze generated URL patterns for SEO concerns."""
    issues = []
    warnings = []

    # Check total page count
    total = len(urls)
    if total > 10000:
        issues.append(f"Generating {total:,} pages — risk of thin content penalty. Consider narrowing variables.")
    elif total > 1000:
        warnings.append(f"Generating {total:,} pages — ensure each has unique, substantial content.")

    # Check URL length
    long_urls = [u for u in urls if len(u["url"]) > 75]
    if long_urls:
        warnings.append(f"{len(long_urls)} URLs exceed 75 chars — may truncate in SERPs.")

    # Check for potential duplicate intent
    template = config["template"]
    var_names = list(config["variables"].keys())
    if len(var_names) >= 2:
        # Check if swapped variables create duplicate intent
        # e.g., "slack-vs-zoom" and "zoom-vs-slack"
        seen_pairs = set()
        dupes = 0
        for u in urls:
            vals = tuple(sorted(u["variables"].values()))
            if vals in seen_pairs:
                dupes += 1
            seen_pairs.add(vals)
        if dupes > 0:
            warnings.append(f"{dupes} URL pairs may have duplicate search intent (e.g., 'A vs B' and 'B vs A'). Consider canonicalizing.")

    # Score
    score = 100
    score -= len(issues) * 20
    score -= len(warnings) * 5
    score = max(0, min(100, score))

    return {
        "total_pages": total,
        "avg_url_length": sum(len(u["url"]) for u in urls) // max(len(urls), 1),
        "long_urls": len(long_urls),
        "issues": issues,
        "warnings": warnings,
        "score": score
    }


def format_report(urls, analysis, config):
    """Format human-readable report."""
    lines = []
    lines.append("")
    lines.append("=" * 60)
    lines.append("  PROGRAMMATIC SEO — URL PATTERN REPORT")
    lines.append("=" * 60)
    lines.append("")
    lines.append(f"  Template:    {config['template']}")
    lines.append(f"  Base URL:    {config.get('base_url', 'https://example.com')}")
    lines.append(f"  Variables:   {len(config['variables'])} ({', '.join(config['variables'].keys())})")
    lines.append(f"  Total Pages: {analysis['total_pages']:,}")
    lines.append(f"  Avg URL Len: {analysis['avg_url_length']} chars")
    lines.append("")

    # Score
    score = analysis["score"]
    bar_filled = score // 5
    bar = "█" * bar_filled + "░" * (20 - bar_filled)
    lines.append(f"  PATTERN SCORE: {score}/100")
    lines.append(f"  [{bar}]")
    lines.append("")

    # Issues
    if analysis["issues"]:
        lines.append("  🔴 ISSUES:")
        for issue in analysis["issues"]:
            lines.append(f"     • {issue}")
        lines.append("")

    if analysis["warnings"]:
        lines.append("  🟡 WARNINGS:")
        for warn in analysis["warnings"]:
            lines.append(f"     • {warn}")
        lines.append("")

    # Sample URLs
    lines.append("  📋 SAMPLE URLS (first 10):")
    for u in urls[:10]:
        lines.append(f"     {u['url']}")
    if len(urls) > 10:
        lines.append(f"     ... and {len(urls) - 10} more")
    lines.append("")

    return "\n".join(lines)


SAMPLE_CONFIG = {
    "template": "{tool}-vs-{competitor}-comparison",
    "variables": {
        "tool": ["slack", "microsoft-teams", "discord", "zoom"],
        "competitor": ["slack", "microsoft-teams", "discord", "zoom", "webex", "google-meet"]
    },
    "base_url": "https://example.com/compare"
}


def main():
    use_json = "--json" in sys.argv
    args = [a for a in sys.argv[1:] if a != "--json"]

    if args and os.path.isfile(args[0]):
        with open(args[0]) as f:
            config = json.load(f)
    else:
        if not args:
            print("[Demo mode — using sample comparison page config]")
        config = SAMPLE_CONFIG

    urls = generate_urls(config)
    analysis = analyze_patterns(urls, config)

    if use_json:
        print(json.dumps({
            "config": config,
            "urls": urls,
            "analysis": analysis
        }, indent=2))
    else:
        print(format_report(urls, analysis, config))


if __name__ == "__main__":
    main()
