<?php
/**
 * POST /api/ingest
 * Authenticated endpoint for the crawler to push data.
 * Body: {"action": "upsert|page|status|blocklist", ...data}
 */

require_once __DIR__ . '/../includes/security.php';

Security::requirePost();
Security::requireAuth();

$input = json_decode(file_get_contents('php://input'), true);
if (!$input || !isset($input['action'])) {
    Security::json(['error' => 'Missing action'], 400);
}

$db = DB::get();
$action = $input['action'];

switch ($action) {

    // ── Upsert an onion address ──────────────────────────────────
    case 'upsert':
        $address = trim($input['address'] ?? '');
        if (!preg_match('/^[a-z2-7]{56}\.onion$/', $address)) {
            Security::json(['error' => 'Invalid onion address'], 400);
        }

        // Check CSAM blocklist
        $md5 = md5($address);
        $blStmt = $db->prepare("SELECT 1 FROM blocklist WHERE hash_md5 = ?");
        $blStmt->execute([$md5]);
        if ($blStmt->fetch()) {
            Security::json(['status' => 'blocked', 'address' => $address]);
        }

        $fields = [
            'title'            => mb_substr($input['title'] ?? '', 0, 500),
            'source'           => mb_substr($input['source'] ?? '', 0, 120),
            'cti_type'         => mb_substr($input['cti_type'] ?? '', 0, 60),
            'cti_name'         => mb_substr($input['cti_name'] ?? '', 0, 200),
            'page_language'    => mb_substr($input['page_language'] ?? '', 0, 10),
            'meta_description' => mb_substr($input['meta_description'] ?? '', 0, 1000),
            'meta_keywords'    => mb_substr($input['meta_keywords'] ?? '', 0, 500),
            'meta_generator'   => mb_substr($input['meta_generator'] ?? '', 0, 200),
            'server_software'  => mb_substr($input['server_software'] ?? '', 0, 200),
            'powered_by'       => mb_substr($input['powered_by'] ?? '', 0, 200),
            'tech_stack'       => mb_substr($input['tech_stack'] ?? '', 0, 500),
            'favicon_url'      => mb_substr($input['favicon_url'] ?? '', 0, 500),
            'og_title'         => mb_substr($input['og_title'] ?? '', 0, 500),
            'og_description'   => mb_substr($input['og_description'] ?? '', 0, 1000),
            'og_image'         => mb_substr($input['og_image'] ?? '', 0, 500),
            'page_size'        => intval($input['page_size'] ?? 0),
            'response_time_ms' => intval($input['response_time_ms'] ?? 0),
            'link_count'       => intval($input['link_count'] ?? 0),
            'form_count'       => intval($input['form_count'] ?? 0),
            'image_count'      => intval($input['image_count'] ?? 0),
            'tags'             => isset($input['tags']) ? json_encode($input['tags']) : null,
            'thumbnail_path'   => mb_substr($input['thumbnail_path'] ?? '', 0, 500),
        ];

        // INSERT ... ON DUPLICATE KEY UPDATE
        $sql = "INSERT INTO onions (address, title, source, cti_type, cti_name,
                    page_language, meta_description, meta_keywords, meta_generator,
                    server_software, powered_by, tech_stack, favicon_url,
                    og_title, og_description, og_image,
                    page_size, response_time_ms, link_count, form_count, image_count,
                    tags, thumbnail_path, last_seen)
                VALUES (:address, :title, :source, :cti_type, :cti_name,
                    :page_language, :meta_description, :meta_keywords, :meta_generator,
                    :server_software, :powered_by, :tech_stack, :favicon_url,
                    :og_title, :og_description, :og_image,
                    :page_size, :response_time_ms, :link_count, :form_count, :image_count,
                    :tags, :thumbnail_path, NOW())
                ON DUPLICATE KEY UPDATE
                    title = IF(:title2 != '', :title3, title),
                    source = IF(source = '', :source2, source),
                    cti_type = IF(:cti_type2 != '', :cti_type3, cti_type),
                    cti_name = IF(:cti_name2 != '', :cti_name3, cti_name),
                    page_language = IF(:lang2 != '', :lang3, page_language),
                    meta_description = IF(:desc2 != '', :desc3, meta_description),
                    meta_keywords = IF(:kw2 != '', :kw3, meta_keywords),
                    meta_generator = IF(:gen2 != '', :gen3, meta_generator),
                    server_software = IF(:srv2 != '', :srv3, server_software),
                    powered_by = IF(:pow2 != '', :pow3, powered_by),
                    tech_stack = IF(:tech2 != '', :tech3, tech_stack),
                    favicon_url = IF(:fav2 != '', :fav3, favicon_url),
                    og_title = IF(:ogt2 != '', :ogt3, og_title),
                    og_description = IF(:ogd2 != '', :ogd3, og_description),
                    og_image = IF(:ogi2 != '', :ogi3, og_image),
                    page_size = IF(:ps2 > 0, :ps3, page_size),
                    response_time_ms = IF(:rt2 > 0, :rt3, response_time_ms),
                    link_count = IF(:lc2 > 0, :lc3, link_count),
                    form_count = IF(:fc2 > 0, :fc3, form_count),
                    image_count = IF(:ic2 > 0, :ic3, image_count),
                    tags = IF(:tags2 IS NOT NULL, :tags3, tags),
                    thumbnail_path = IF(:tp2 != '', :tp3, thumbnail_path),
                    last_seen = NOW()";

        $params = ['address' => $address];
        foreach ($fields as $k => $v) {
            $params[$k] = $v;
        }
        // Duplicate params for ON DUPLICATE KEY (PDO limitation)
        $params['title2'] = $params['title3'] = $fields['title'];
        $params['source2'] = $fields['source'];
        $params['cti_type2'] = $params['cti_type3'] = $fields['cti_type'];
        $params['cti_name2'] = $params['cti_name3'] = $fields['cti_name'];
        $params['lang2'] = $params['lang3'] = $fields['page_language'];
        $params['desc2'] = $params['desc3'] = $fields['meta_description'];
        $params['kw2'] = $params['kw3'] = $fields['meta_keywords'];
        $params['gen2'] = $params['gen3'] = $fields['meta_generator'];
        $params['srv2'] = $params['srv3'] = $fields['server_software'];
        $params['pow2'] = $params['pow3'] = $fields['powered_by'];
        $params['tech2'] = $params['tech3'] = $fields['tech_stack'];
        $params['fav2'] = $params['fav3'] = $fields['favicon_url'];
        $params['ogt2'] = $params['ogt3'] = $fields['og_title'];
        $params['ogd2'] = $params['ogd3'] = $fields['og_description'];
        $params['ogi2'] = $params['ogi3'] = $fields['og_image'];
        $params['ps2'] = $params['ps3'] = $fields['page_size'];
        $params['rt2'] = $params['rt3'] = $fields['response_time_ms'];
        $params['lc2'] = $params['lc3'] = $fields['link_count'];
        $params['fc2'] = $params['fc3'] = $fields['form_count'];
        $params['ic2'] = $params['ic3'] = $fields['image_count'];
        $params['tags2'] = $params['tags3'] = $fields['tags'];
        $params['tp2'] = $params['tp3'] = $fields['thumbnail_path'];

        $db->prepare($sql)->execute($params);
        Security::json(['status' => 'ok', 'address' => $address]);
        break;

    // ── Add discovered sub-page ──────────────────────────────────
    case 'page':
        $address = trim($input['address'] ?? '');
        $pageUrl = trim($input['page_url'] ?? '');
        $title   = mb_substr($input['title'] ?? '', 0, 500);

        if (!$address || !$pageUrl) {
            Security::json(['error' => 'Missing address or page_url'], 400);
        }

        // Get onion_id
        $stmt = $db->prepare("SELECT id FROM onions WHERE address = ?");
        $stmt->execute([$address]);
        $onion = $stmt->fetch();
        if (!$onion) {
            Security::json(['error' => 'Onion not found'], 404);
        }

        $db->prepare(
            "INSERT INTO pages (onion_id, page_url, title, source)
             VALUES (?, ?, ?, ?)
             ON DUPLICATE KEY UPDATE title = IF(? != '', ?, title), last_seen = NOW()"
        )->execute([
            $onion['id'], mb_substr($pageUrl, 0, 2000), $title,
            mb_substr($input['source'] ?? '', 0, 120),
            $title, $title
        ]);
        Security::json(['status' => 'ok']);
        break;

    // ── Update status check result ───────────────────────────────
    case 'status':
        $address = trim($input['address'] ?? '');
        $alive   = (bool) ($input['alive'] ?? false);
        $title   = mb_substr($input['title'] ?? '', 0, 500);
        $headers = mb_substr($input['headers'] ?? '', 0, 2000);

        if (!$address) {
            Security::json(['error' => 'Missing address'], 400);
        }

        if ($alive) {
            $db->prepare(
                "UPDATE onions SET status='online', offline_streak=0,
                 check_count=check_count+1, last_checked=NOW(), last_seen=NOW(),
                 title=IF(?!='',?,title), last_headers=?
                 WHERE address=?"
            )->execute([$title, $title, $headers, $address]);
        } else {
            $db->prepare(
                "UPDATE onions SET status='offline', offline_streak=offline_streak+1,
                 check_count=check_count+1, last_checked=NOW(), last_headers=?
                 WHERE address=?"
            )->execute([$headers, $address]);
        }

        // Log to uptime history (for sparklines)
        $responseMs = intval($input['response_ms'] ?? 0);
        $db->prepare(
            "INSERT INTO uptime_log (address, alive, response_ms) VALUES (?, ?, ?)"
        )->execute([$address, $alive ? 1 : 0, $responseMs]);

        Security::json(['status' => 'ok']);
        break;

    // ── Sync CSAM blocklist ──────────────────────────────────────
    case 'blocklist':
        $hashes = $input['hashes'] ?? [];
        if (!is_array($hashes)) {
            Security::json(['error' => 'hashes must be array'], 400);
        }
        $stmt = $db->prepare("INSERT IGNORE INTO blocklist (hash_md5) VALUES (?)");
        $count = 0;
        foreach ($hashes as $h) {
            if (preg_match('/^[a-f0-9]{32}$/', $h)) {
                $stmt->execute([$h]);
                $count++;
            }
        }
        Security::json(['status' => 'ok', 'synced' => $count]);
        break;

    // ── Bulk prune offline ───────────────────────────────────────
    case 'prune':
        $minStreak = max(3, intval($input['min_streak'] ?? 5));

        // Protected: sites with >1 page
        $pruned = $db->prepare(
            "DELETE o FROM onions o
             LEFT JOIN (SELECT onion_id, COUNT(*) as pc FROM pages GROUP BY onion_id HAVING pc > 1) p
                ON p.onion_id = o.id
             WHERE o.status = 'offline' AND o.offline_streak >= ? AND p.onion_id IS NULL"
        );
        $pruned->execute([$minStreak]);
        $count = $pruned->rowCount();
        Security::json(['status' => 'ok', 'pruned' => $count]);
        break;

    // ── Fetch pending reports (for admin review) ─────────────
    case 'reports_pending':
        $limit = min(100, max(1, intval($input['limit'] ?? 50)));
        $stmt = $db->prepare(
            "SELECT r.id, r.address, r.reason, r.detail, r.reported_at, r.status,
                    o.title, o.status AS site_status, o.cti_type, o.tags,
                    LEAST(999, (SELECT COUNT(*) FROM reports r2 WHERE r2.address = r.address AND r2.status = 'pending')) AS report_count
             FROM reports r
             LEFT JOIN onions o ON o.address = r.address
             WHERE r.status = 'pending'
             GROUP BY r.address
             ORDER BY report_count DESC, r.reported_at ASC
             LIMIT ?"
        );
        $stmt->execute([$limit]);
        $reports = $stmt->fetchAll();
        Security::json(['reports' => $reports]);
        break;

    // ── Dismiss a report (ignore) ────────────────────────────
    case 'report_dismiss':
        $reportId = intval($input['report_id'] ?? 0);
        $address  = trim($input['address'] ?? '');
        $note     = mb_substr($input['note'] ?? '', 0, 500);

        if ($reportId > 0) {
            // Dismiss single report
            $db->prepare(
                "UPDATE reports SET status='dismissed', admin_note=?, resolved_at=NOW() WHERE id=?"
            )->execute([$note, $reportId]);
        } elseif ($address !== '') {
            // Dismiss ALL pending reports for this address
            $db->prepare(
                "UPDATE reports SET status='dismissed', admin_note=?, resolved_at=NOW()
                 WHERE address=? AND status='pending'"
            )->execute([$note, $address]);
        }
        Security::json(['status' => 'ok']);
        break;

    // ── Block a reported site (blocklist + remove) ───────────
    case 'report_block':
        $address = trim($input['address'] ?? '');
        $note    = mb_substr($input['note'] ?? '', 0, 500);

        if (!preg_match('/^[a-z2-7]{56}\.onion$/', $address)) {
            Security::json(['error' => 'Invalid address'], 400);
        }

        // Add to blocklist
        $md5 = md5($address);
        $db->prepare("INSERT IGNORE INTO blocklist (hash_md5, source) VALUES (?, 'report-block')")->execute([$md5]);

        // Remove all related data
        $db->prepare("DELETE FROM pages WHERE onion_id IN (SELECT id FROM onions WHERE address=?)")->execute([$address]);
        $db->prepare("DELETE FROM uptime_log WHERE address=?")->execute([$address]);
        $db->prepare("DELETE FROM site_changes WHERE address=?")->execute([$address]);
        $db->prepare("DELETE FROM canaries WHERE address=?")->execute([$address]);
        $db->prepare("DELETE FROM snapshots WHERE address=?")->execute([$address]);
        $db->prepare("DELETE FROM onions WHERE address = ?")->execute([$address]);

        // Mark all reports for this address as blocked
        $db->prepare(
            "UPDATE reports SET status='blocked', admin_note=?, resolved_at=NOW()
             WHERE address=? AND status='pending'"
        )->execute([$note, $address]);

        Security::json(['status' => 'ok', 'blocked' => $address]);
        break;

    // ── Report stats ─────────────────────────────────────────
    case 'reports_stats':
        $pending   = (int) $db->query("SELECT COUNT(*) FROM reports WHERE status='pending'")->fetchColumn();
        $dismissed = (int) $db->query("SELECT COUNT(*) FROM reports WHERE status='dismissed'")->fetchColumn();
        $blocked   = (int) $db->query("SELECT COUNT(*) FROM reports WHERE status='blocked'")->fetchColumn();
        Security::json(['pending' => $pending, 'dismissed' => $dismissed, 'blocked' => $blocked]);
        break;

    // ── Get trending terms for review ────────────────────────
    case 'trending_list':
        $stmt = $db->query(
            "SELECT term, hits, approved, last_seen FROM trending_terms ORDER BY hits DESC LIMIT 100"
        );
        Security::json(['terms' => $stmt->fetchAll()]);
        break;

    // ── Approve or reject trending terms ─────────────────────
    case 'trending_approve':
        $term   = mb_strtolower(trim($input['term'] ?? ''));
        $approve = (bool) ($input['approve'] ?? false);
        if ($term === '') Security::json(['error' => 'Missing term'], 400);
        if ($approve) {
            $db->prepare("UPDATE trending_terms SET approved = 1 WHERE term = ?")->execute([$term]);
        } else {
            // Reject = delete so it won't accumulate
            $db->prepare("DELETE FROM trending_terms WHERE term = ?")->execute([$term]);
        }
        Security::json(['status' => 'ok']);
        break;

    // ── Log a site change ────────────────────────────────────
    case 'change':
        $address = trim($input['address'] ?? '');
        $type    = $input['change_type'] ?? '';
        $old     = mb_substr($input['old_val'] ?? '', 0, 500);
        $new     = mb_substr($input['new_val'] ?? '', 0, 500);
        $valid = ['title_changed','came_online','went_offline','new_pages','content_changed','canary_changed'];
        if (!$address || !in_array($type, $valid)) Security::json(['error' => 'Bad params'], 400);
        $db->prepare("INSERT INTO site_changes (address,change_type,old_val,new_val) VALUES (?,?,?,?)")
           ->execute([$address, $type, $old, $new]);
        Security::json(['status' => 'ok']);
        break;

    // ── Update content hash + mirror detection ───────────────
    case 'content_hash':
        $address = trim($input['address'] ?? '');
        $hash    = trim($input['hash'] ?? '');
        if (!$address || !$hash) Security::json(['error' => 'Bad params'], 400);
        $db->prepare("UPDATE onions SET content_hash=?, mirror_group=? WHERE address=?")
           ->execute([$hash, $hash, $address]);
        Security::json(['status' => 'ok']);
        break;

    // ── Canary upsert ────────────────────────────────────────
    case 'canary':
        $address = trim($input['address'] ?? '');
        $url     = mb_substr($input['page_url'] ?? '', 0, 2000);
        $hash    = trim($input['canary_hash'] ?? '');
        $preview = mb_substr($input['preview'] ?? '', 0, 500);
        if (!$address || !$hash) Security::json(['error' => 'Bad params'], 400);
        // Check if existing canary
        $ex = $db->prepare("SELECT id, canary_hash, status FROM canaries WHERE address=? LIMIT 1");
        $ex->execute([$address]); $row = $ex->fetch();
        if ($row) {
            if ($row['canary_hash'] !== $hash) {
                // Canary changed!
                $db->prepare("UPDATE canaries SET last_hash=canary_hash, canary_hash=?, preview=?, status='changed', last_seen=NOW() WHERE id=?")
                   ->execute([$hash, $preview, $row['id']]);
                $db->prepare("INSERT INTO site_changes (address,change_type,old_val,new_val) VALUES (?,'canary_changed',?,?)")
                   ->execute([$address, substr($row['canary_hash'],0,16), substr($hash,0,16)]);
            } else {
                $db->prepare("UPDATE canaries SET last_seen=NOW(), status='active' WHERE id=?")->execute([$row['id']]);
            }
        } else {
            $db->prepare("INSERT INTO canaries (address,page_url,canary_hash,preview) VALUES (?,?,?,?)")
               ->execute([$address, $url, $hash, $preview]);
        }
        Security::json(['status' => 'ok']);
        break;

    // ── Store snapshot metadata ──────────────────────────────
    case 'snapshot':
        $address = trim($input['address'] ?? '');
        $title   = mb_substr($input['title'] ?? '', 0, 500);
        $hash    = trim($input['content_hash'] ?? '');
        $size    = intval($input['page_size'] ?? 0);
        $path    = mb_substr($input['html_path'] ?? '', 0, 500);
        if (!$address) Security::json(['error' => 'Bad params'], 400);
        // Dedup: skip if latest snapshot has same content_hash
        $lastSnap = $db->prepare("SELECT content_hash FROM snapshots WHERE address=? ORDER BY captured_at DESC LIMIT 1");
        $lastSnap->execute([$address]); $lastRow = $lastSnap->fetch();
        if ($lastRow && $lastRow['content_hash'] === $hash && $hash !== '') {
            Security::json(['status' => 'ok', 'skipped' => 'unchanged']);
            break;
        }
        $db->prepare("INSERT INTO snapshots (address,title,content_hash,page_size,html_path) VALUES (?,?,?,?,?)")
           ->execute([$address, $title, $hash, $size, $path]);
        Security::json(['status' => 'ok']);
        break;

    // ── Fetch pending submissions ────────────────────────────
    case 'submissions_pending':
        $lim = min(100, max(1, intval($input['limit'] ?? 50)));
        $st = $db->prepare("SELECT id,address,description,submitted_at FROM submissions WHERE status='pending' ORDER BY submitted_at ASC LIMIT ?");
        $st->execute([$lim]);
        Security::json(['submissions' => $st->fetchAll()]);
        break;

    // ── Accept/reject submission ─────────────────────────────
    case 'submission_resolve':
        $id     = intval($input['id'] ?? 0);
        $accept = (bool)($input['accept'] ?? false);
        if (!$id) Security::json(['error' => 'Missing id'], 400);
        $status = $accept ? 'accepted' : 'rejected';
        $db->prepare("UPDATE submissions SET status=?, resolved_at=NOW() WHERE id=?")->execute([$status, $id]);
        Security::json(['status' => 'ok']);
        break;

    // ── Cleanup old data (run periodically) ──────────────────
    case 'cleanup':
        $deleted = [];
        // Uptime log: keep 60 days
        $st = $db->prepare("DELETE FROM uptime_log WHERE checked_at < DATE_SUB(NOW(), INTERVAL 60 DAY)");
        $st->execute(); $deleted['uptime_log'] = $st->rowCount();
        // Snapshots: keep newest 20 per address, delete excess
        // Fix #21: Use ROW_NUMBER window function (MariaDB 10.2+/MySQL 8+) instead of O(n²) correlated subquery
        $st = $db->query(
            "DELETE s FROM snapshots s
             INNER JOIN (
               SELECT id FROM (
                 SELECT id, ROW_NUMBER() OVER (PARTITION BY address ORDER BY captured_at DESC) AS rn
                 FROM snapshots
               ) ranked WHERE rn > 20
             ) excess ON s.id = excess.id"
        ); $deleted['snapshots'] = $st->rowCount();
        // Site changes: keep 90 days
        $st = $db->prepare("DELETE FROM site_changes WHERE detected_at < DATE_SUB(NOW(), INTERVAL 90 DAY)");
        $st->execute(); $deleted['site_changes'] = $st->rowCount();
        // Trending: prune terms not seen in 30 days and not approved
        $st = $db->prepare("DELETE FROM trending_terms WHERE approved = 0 AND last_seen < DATE_SUB(NOW(), INTERVAL 30 DAY)");
        $st->execute(); $deleted['trending'] = $st->rowCount();
        Security::json(['status' => 'ok', 'deleted' => $deleted]);
        break;

    // ── Removal request management ──────────────────────────────────────
    case 'removals_pending':
        $st = $db->prepare("SELECT id,type,address,email,detail,status,created_at FROM removal_requests WHERE status='pending' ORDER BY created_at ASC LIMIT 100");
        $st->execute();
        Security::json(['requests' => $st->fetchAll(PDO::FETCH_ASSOC)]);
        break;

    case 'removal_approve':
        $rid = intval($input['removal_id'] ?? 0);
        $note = trim($input['admin_note'] ?? '');
        if (!$rid) Security::json(['error' => 'removal_id required'], 400);
        // Mark as approved
        $st = $db->prepare("UPDATE removal_requests SET status='approved', admin_note=?, resolved_at=NOW() WHERE id=?");
        $st->execute([$note, $rid]);
        // If it has an address, delist it (delete from onions + pages + block future crawl)
        $st2 = $db->prepare("SELECT address FROM removal_requests WHERE id=?");
        $st2->execute([$rid]);
        $rr = $st2->fetch();
        if ($rr && $rr['address'] !== '') {
            $addr = $rr['address'];
            // Delete related data first (pages cascade from onions FK, but be explicit)
            $db->prepare("DELETE FROM pages WHERE onion_id IN (SELECT id FROM onions WHERE address=?)")->execute([$addr]);
            $db->prepare("DELETE FROM uptime_log WHERE address=?")->execute([$addr]);
            $db->prepare("DELETE FROM site_changes WHERE address=?")->execute([$addr]);
            $db->prepare("DELETE FROM canaries WHERE address=?")->execute([$addr]);
            $db->prepare("DELETE FROM snapshots WHERE address=?")->execute([$addr]);
            $db->prepare("DELETE FROM onions WHERE address=?")->execute([$addr]);
            // Add to blocklist so crawler doesn't re-add it
            $hash = md5($addr);
            $db->prepare("INSERT IGNORE INTO blocklist (hash_md5, source) VALUES (?, 'removal-request')")->execute([$hash]);
            Security::json(['status' => 'ok', 'delisted' => $addr]);
        } else {
            // Personal/DMCA/other — just mark resolved, admin handles manually
            $st3 = $db->prepare("UPDATE removal_requests SET status='completed', resolved_at=NOW() WHERE id=?");
            $st3->execute([$rid]);
            Security::json(['status' => 'ok', 'resolved' => $rid]);
        }
        break;

    case 'removal_deny':
        $rid = intval($input['removal_id'] ?? 0);
        $note = trim($input['admin_note'] ?? '');
        if (!$rid) Security::json(['error' => 'removal_id required'], 400);
        $st = $db->prepare("UPDATE removal_requests SET status='denied', admin_note=?, resolved_at=NOW() WHERE id=?");
        $st->execute([$note, $rid]);
        Security::json(['status' => 'ok', 'denied' => $rid]);
        break;

    default:
        Security::json(['error' => 'Unknown action: ' . $action], 400);
}
