"""
In Umbra — Remote API Adapter
═══════════════════════════════════════

Syncs crawler data to the PHP/MariaDB backend via REST API.
Dual-write architecture:
  - Local SQLite → fast reads for the desktop GUI
  - Remote API  → feeds the public web frontend

Usage:
    from api_adapter import RemoteSync

    sync = RemoteSync(
        api_url="https://your-domain.com/api/ingest.php",
        api_key="your-64-char-hex-key"
    )

    # Wrap the existing Database object:
    sync.wrap(app.db)

    # Now every db.upsert_onion(), db.update_status(), etc.
    # writes locally AND pushes to the remote API in background.
"""

import json
import logging
import hashlib
import re
import threading
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from urllib.parse import urlparse

import requests

log = logging.getLogger("umbra.sync")


class RemoteSync:
    """
    Wraps a local Database instance and mirrors all write operations
    to the remote PHP API. Reads stay local (fast).
    
    API pushes are non-blocking — fire-and-forget via thread pool.
    Failures are logged but never crash the crawler.
    """

    UA = {"User-Agent": "InUmbraCrawler/7.0"}

    def __init__(self, api_url: str, api_key: str, workers: int = 3, timeout: int = 15):
        self.api_url = api_url.rstrip("/")
        self.api_key = api_key
        self.timeout = timeout
        self.pool = ThreadPoolExecutor(max_workers=workers, thread_name_prefix="api-sync")
        self._session = requests.Session()
        self._session.headers.update({
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
            **self.UA,
        })
        self._db = None
        self._original_methods = {}
        self._stats = {"sent": 0, "ok": 0, "fail": 0}
        self._stats_lock = threading.Lock()

    # ── Core transport ──────────────────────────────────────────

    def _post(self, payload: dict) -> dict | None:
        """POST JSON to the ingest API. Returns response dict or None on failure."""
        try:
            resp = self._session.post(
                self.api_url,
                json=payload,
                timeout=self.timeout,
            )
            with self._stats_lock:
                self._stats["sent"] += 1
            if resp.status_code == 200:
                with self._stats_lock:
                    self._stats["ok"] += 1
                return resp.json()
            else:
                with self._stats_lock:
                    self._stats["fail"] += 1
                log.warning(f"API {resp.status_code}: {resp.text[:200]}")
                return None
        except Exception as e:
            with self._stats_lock:
                self._stats["fail"] += 1
            log.warning(f"API error: {e}")
            return None

    def _push(self, payload: dict):
        """Non-blocking push — submits to thread pool."""
        self.pool.submit(self._post, payload)

    @property
    def stats(self) -> dict:
        with self._stats_lock:
            return dict(self._stats)

    # ── Wrap the Database object ────────────────────────────────

    def wrap(self, db):
        """
        Monkey-patch the Database instance to dual-write.
        All existing local writes continue working.
        API pushes happen in background after local write succeeds.
        """
        self._db = db

        # Methods to intercept
        intercepts = {
            "upsert_onion": self._wrap_upsert_onion,
            "bulk_upsert": self._wrap_bulk_upsert,
            "update_status": self._wrap_update_status,
            "update_metadata": self._wrap_update_metadata,
            "add_discovered_page": self._wrap_add_discovered_page,
            "update_thumbnail": self._wrap_update_thumbnail,
            "update_lang": self._wrap_update_lang,
            "update_tags": self._wrap_update_tags,
            "update_content_hash": self._wrap_update_content_hash,
            "mark_crawled": self._wrap_mark_crawled,
            "prune_offline": self._wrap_prune_offline,
            "add_to_blocklist": self._wrap_add_to_blocklist,
            "store_snapshot": self._wrap_store_snapshot,
        }

        for method_name, wrapper in intercepts.items():
            if hasattr(db, method_name):
                # Save original
                self._original_methods[method_name] = getattr(db, method_name)
                # Replace with wrapper
                setattr(db, method_name, wrapper)

        log.info(f"RemoteSync: wrapped {len(intercepts)} methods → {self.api_url}")

    def unwrap(self):
        """Restore original methods."""
        if self._db:
            for name, original in self._original_methods.items():
                setattr(self._db, name, original)
            self._original_methods.clear()
            log.info("RemoteSync: unwrapped")

    def shutdown(self):
        """Wait for pending pushes and shut down."""
        self.pool.shutdown(wait=True, cancel_futures=False)
        log.info(f"RemoteSync shutdown: {self.stats}")

    # ── Wrapped methods ─────────────────────────────────────────
    # Pattern: call original (local write), then push to API async

    def _wrap_upsert_onion(self, onion, title="", category=1,
                            cti_type="", cti_status="", hits=0,
                            source="", from_search=False, group_id=""):
        # Local write first
        self._original_methods["upsert_onion"](
            onion, title=title, category=category,
            cti_type=cti_type, cti_status=cti_status, hits=hits,
            source=source, from_search=from_search, group_id=group_id
        )
        # Push to remote
        self._push({
            "action": "upsert",
            "address": onion,
            "title": title,
            "source": source,
            "cti_type": cti_type,
            "from_search": from_search,
        })

    def _wrap_bulk_upsert(self, onion_list, source=""):
        # Local write
        self._original_methods["bulk_upsert"](onion_list, source=source)
        # Batch push — one request per onion (API handles dedup)
        for onion in onion_list:
            self._push({
                "action": "upsert",
                "address": onion,
                "source": source,
            })

    def _wrap_update_status(self, onion, status, title="", headers=""):
        # Detect status changes before local write (under DB lock to avoid races)
        old_status = ""
        old_title = ""
        try:
            with self._db.lock:
                old_row = self._db.conn.execute(
                    "SELECT status_live, last_title FROM directory WHERE onion=?", (onion,)
                ).fetchone()
                if old_row:
                    old_status = dict(old_row).get("status_live", "")
                    old_title = dict(old_row).get("last_title", "")
        except Exception:
            pass
        alive = status in ("online", "200", "301", "302", "403")
        # Local write
        self._original_methods["update_status"](onion, status, title, headers)
        # Push to remote
        self._push({
            "action": "status",
            "address": onion,
            "alive": alive,
            "title": title,
            "headers": headers[:2000],
        })
        # Change detection with flood protection
        # Only log status flips (ignore unknown→anything transitions)
        if old_status in ("online",) and not alive:
            self._push({"action": "change", "address": onion,
                         "change_type": "went_offline", "old_val": "online", "new_val": "offline"})
        elif old_status in ("offline",) and alive:
            self._push({"action": "change", "address": onion,
                         "change_type": "came_online", "old_val": "offline", "new_val": "online"})
        # Title changes: only log if both non-empty and substantially different
        if title and old_title and title != old_title:
            # Skip trivial diffs (whitespace only, case only)
            t1 = re.sub(r'\s+', ' ', title.strip().lower())
            t2 = re.sub(r'\s+', ' ', old_title.strip().lower())
            if t1 != t2:
                self._push({"action": "change", "address": onion,
                             "change_type": "title_changed", "old_val": old_title[:200], "new_val": title[:200]})

    def _wrap_update_metadata(self, onion, meta: dict):
        # Local write
        self._original_methods["update_metadata"](onion, meta)
        if not meta:
            return
        # Push to remote — flatten for the API
        payload = {"action": "upsert", "address": onion}
        field_map = {
            "server_software": "server_software",
            "powered_by": "powered_by",
            "meta_generator": "meta_generator",
            "meta_description": "meta_description",
            "meta_keywords": "meta_keywords",
            "favicon_url": "favicon_url",
            "page_size": "page_size",
            "response_time_ms": "response_time_ms",
            "link_count": "link_count",
            "form_count": "form_count",
            "image_count": "image_count",
        }
        for local_key, api_key in field_map.items():
            if local_key in meta:
                payload[api_key] = meta[local_key]

        if "tech_stack" in meta:
            tech = meta["tech_stack"]
            payload["tech_stack"] = ",".join(tech) if isinstance(tech, list) else str(tech)

        if "open_graph" in meta:
            og = meta["open_graph"]
            if isinstance(og, dict):
                payload["og_title"] = og.get("og:title", "")
                payload["og_description"] = og.get("og:description", "")
                payload["og_image"] = og.get("og:image", "")

        self._push(payload)

    def _wrap_add_discovered_page(self, base_onion, page_url, title="", source=""):
        # Local write
        self._original_methods["add_discovered_page"](base_onion, page_url, title, source)
        # Push to remote
        self._push({
            "action": "page",
            "address": base_onion,
            "page_url": page_url,
            "title": title,
            "source": source,
        })

    def _wrap_update_thumbnail(self, onion, thumb_path):
        # Local write
        self._original_methods["update_thumbnail"](onion, thumb_path)
        # Push path reference (actual file stays on crawler disk)
        self._push({
            "action": "upsert",
            "address": onion,
            "thumbnail_path": thumb_path,
        })

    def _wrap_update_lang(self, onion, lang):
        # Local write
        self._original_methods["update_lang"](onion, lang)
        self._push({
            "action": "upsert",
            "address": onion,
            "page_language": lang,
        })

    def _wrap_update_tags(self, onion, tags):
        # Local write
        self._original_methods["update_tags"](onion, tags)
        self._push({
            "action": "upsert",
            "address": onion,
            "tags": tags if isinstance(tags, list) else json.loads(tags or "[]"),
        })

    def _wrap_update_content_hash(self, onion, content_hash):
        # Local write
        self._original_methods["update_content_hash"](onion, content_hash)
        # Push to remote for mirror detection
        if content_hash:
            self._push({
                "action": "content_hash",
                "address": onion,
                "hash": content_hash,
            })

    def _wrap_mark_crawled(self, onion):
        # Local write only — crawl scheduling is internal
        self._original_methods["mark_crawled"](onion)

    def _wrap_prune_offline(self, min_streak=5):
        # Local prune
        result = self._original_methods["prune_offline"](min_streak)
        # Remote prune
        self._push({
            "action": "prune",
            "min_streak": min_streak,
        })
        return result

    def _wrap_add_to_blocklist(self, onion, reason="manual"):
        # Local write
        self._original_methods["add_to_blocklist"](onion, reason)
        # Push blocklist hash to remote
        md5 = hashlib.md5(onion.encode()).hexdigest()
        self._push({
            "action": "blocklist",
            "hashes": [md5],
        })

    # ── Blocklist sync (bulk, for startup) ──────────────────────

    def sync_blocklist(self, hashes: list[str]):
        """Push a batch of Ahmia blocklist hashes to the remote API."""
        # Chunk into batches of 500
        for i in range(0, len(hashes), 500):
            chunk = hashes[i:i+500]
            self._push({
                "action": "blocklist",
                "hashes": chunk,
            })
        log.info(f"RemoteSync: queued {len(hashes)} blocklist hashes for sync")

    # ── Health check ────────────────────────────────────────────

    def ping(self) -> bool:
        """Test API connectivity."""
        try:
            resp = self._session.get(
                self.api_url.replace("/ingest.php", "/stats.php"),
                timeout=5,
            )
            return resp.status_code == 200
        except Exception:
            return False

    # ── Canary push ─────────────────────────────────────────────

    def push_canary(self, onion, page_url, canary_hash, preview=""):
        """Push a canary detection to the remote API."""
        self._push({
            "action": "canary",
            "address": onion,
            "page_url": page_url[:2000],
            "canary_hash": canary_hash,
            "preview": preview[:500],
        })

    # ── Snapshot push ───────────────────────────────────────────

    def push_snapshot(self, onion, title="", content_hash="", page_size=0, html_path=""):
        """Push snapshot metadata to the remote API."""
        self._push({
            "action": "snapshot",
            "address": onion,
            "title": title[:500],
            "content_hash": content_hash,
            "page_size": page_size,
            "html_path": html_path[:500],
        })

    # ── Store snapshot wrapper (wraps db.store_snapshot) ────────

    def _wrap_store_snapshot(self, onion, title="", content_hash="",
                             page_count=0, entity_count=0, links_json="", raw_text=""):
        # Local write
        self._original_methods["store_snapshot"](
            onion, title, content_hash, page_count, entity_count, links_json, raw_text)
        # Push metadata to remote
        self.push_snapshot(onion, title, content_hash, len(raw_text))

    # ── Change push (direct) ────────────────────────────────────

    def push_change(self, onion, change_type, old_val="", new_val=""):
        """Push a change event to the remote API."""
        self._push({
            "action": "change",
            "address": onion,
            "change_type": change_type,
            "old_val": old_val[:500],
            "new_val": new_val[:500],
        })

    # ── Submissions management ──────────────────────────────────

    def fetch_submissions(self, limit=50):
        """Fetch pending submissions from the remote API (blocking)."""
        resp = self._post({"action": "submissions_pending", "limit": limit})
        return resp.get("submissions", []) if resp else []

    def resolve_submission(self, sub_id, accept=True):
        """Accept or reject a submission (blocking)."""
        return self._post({"action": "submission_resolve", "id": sub_id, "accept": accept})


# ═══════════════════════════════════════════════════════════════
# Integration helper — call this from App.__init__ or _startup
# ═══════════════════════════════════════════════════════════════

def init_remote_sync(app, api_url: str, api_key: str) -> RemoteSync | None:
    """
    Initialize remote sync and wire it into the app.
    
    Usage in App._startup():
        from api_adapter import init_remote_sync
        self.sync = init_remote_sync(self, 
            api_url="https://your-domain.com/api/ingest.php",
            api_key="your-key-here")
    """
    if not api_url or not api_key:
        log.info("RemoteSync: disabled (no API URL/key configured)")
        return None

    sync = RemoteSync(api_url, api_key)
    if sync.ping():
        sync.wrap(app.db)
        log.info(f"RemoteSync: connected to {api_url}")
        
        # Sync Ahmia blocklist on startup
        try:
            from umbra_search import AhmiaBlocklist
            hashes = list(AhmiaBlocklist._hashes)
            if hashes:
                sync.sync_blocklist(hashes)
        except Exception as e:
            log.warning(f"RemoteSync: blocklist sync failed: {e}")
        
        return sync
    else:
        log.warning(f"RemoteSync: cannot reach {api_url} — running local-only")
        return None
