import json
import time
import random
import logging
import re
import os
import csv
import argparse
import atexit
from urllib.parse import urlparse
import tempfile
import pandas as pd
import mysql.connector
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# ----------------------------
# CONFIG
# ----------------------------
db_config = {
    "host": "10.8.0.1",  # Адреса сервера бази даних
    "user": "integration",       # Ім'я користувача
    "password": "?Q8/{lVK2N08Y<b>k",  # Пароль
    "database": "Salsify"  # Назва бази даних
}

QUERY = """
SELECT SL.SKU, MLW.`Account SKU`, CONCAT('https://www.lowes.com/pd/-/',MLW.`Account SKU`) AS LINK
FROM  Salsify.MainData SL
LEFT JOIN MySamm.Lowes MLW  ON MLW.SKU = SL.SKU
LEFT JOIN Salsify.Lowes SLW  ON SLW.SKU = SL.SKU
WHERE MLW.SKU LIKE '%' AND (SL.Status='Active' OR SL.Status='Liquidation')
"""

OUTPUT_FILE = "LowesVideos.json"
PARTNER_URLS_FILE = os.path.join(os.path.dirname(__file__), "partnerUrls.csv")

# CHROMEDRIVER = "/Users/pond/.cache/selenium/chromedriver/mac-arm64/144.0.7559.133/chromedriver"
CHROMEDRIVER = "/usr/local/bin/chromedriver"
CHROME = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"

HEADLESS = False
NAV_TIMEOUT_MS = 45_000
MAX_RETRIES = 3
ROTATE_CONTEXT_EVERY = 15
SAVE_PDP_HTML = True
PDP_HTML_DIR = "pdp_html/lws"
UC_HEADLESS = False
VPN_ENABLED = True
WG_CONFIG_DIR = os.environ.get("WG_CONFIG_DIR", "/etc/wireguard")
WG_COOLDOWN_SEC = float(os.environ.get("WG_COOLDOWN_SEC", "2.0"))
VPN_ROTATOR = None

# Basic, honest request headers for PDP navigation.
USER_AGENT = (
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/120.0.0.0 Safari/537.36"
)

# capture manifests and direct files
MANIFEST_RE = re.compile(r"\.m3u8(\?|$)", re.I)
FILE_RE = re.compile(r"\.(mp4|webm)(\?|$)", re.I)

# optional: capture segments too (usually you DON'T need them)
CAPTURE_SEGMENTS = False
SEGMENT_RE = re.compile(r"\.(m4s|ts)(\?|$)", re.I)

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

try:
    from wg_vpn import (
        WireGuardRotator,
        parse_document_status_from_performance_logs,
        should_rotate_on_status,
        is_dns_error_html,
    )
except Exception:
    WireGuardRotator = None
    parse_document_status_from_performance_logs = None
    should_rotate_on_status = None
    is_dns_error_html = None


def ensure_driver_ready(driver, max_attempts: int = 15):
    for _ in range(max_attempts):
        try:
            _ = driver.current_url
            driver.get("about:blank")
            driver.execute_cdp_cmd("Network.enable", {})
            time.sleep(0.5)
            return driver
        except Exception:
            try:
                driver.quit()
            except Exception:
                pass
            time.sleep(1.5)
            driver = get_uc_driver()
    return driver

def load_partner_urls(csv_path: str, column_name: str) -> list[tuple[str, str]]:
    items = []
    with open(csv_path, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            sku = (row.get("SKU") or "").strip()
            url = (row.get(column_name) or "").strip()
            if not url or url.upper() == "NULL":
                continue
            items.append((sku, url))
    return items


def build_page_headers():
    return {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://www.lowes.com/",
        "DNT": "1",
        "Upgrade-Insecure-Requests": "1",
    }

def apply_standard_headers(driver) -> None:
    try:
        driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": USER_AGENT})
    except Exception:
        pass
    try:
        driver.execute_cdp_cmd("Network.setExtraHTTPHeaders", {"headers": build_page_headers()})
    except Exception:
        pass


def choose_best_video_url(urls: set[str]) -> dict:
    """
    Prefer a master/playlist .m3u8, else a direct mp4/webm.
    Returns {type, url} or {type:'none', url:None}.
    """
    m3u8 = [u for u in urls if MANIFEST_RE.search(u)]
    mp4 = [u for u in urls if FILE_RE.search(u)]

    # Heuristic: master playlists sometimes contain "master" or "index" or have many query params.
    def score_m3u8(u: str) -> int:
        s = 0
        lu = u.lower()
        if lu.endswith("name/a.mp4/index.m3u8"):
            s += 40
        if "master" in lu: s += 5
        if "playlist" in lu: s += 3
        if "index" in lu: s += 2
        if "variant" in lu: s += 2
        # longer often means signed master url
        s += min(len(u) // 80, 4)
        return s

    if m3u8:
        best = sorted(m3u8, key=score_m3u8, reverse=True)[0]
        return {"type": "m3u8", "url": best}

    if mp4:
        # If multiple mp4s, pick longest (often best quality / signed URL)
        best = sorted(mp4, key=len, reverse=True)[0]
        return {"type": "mp4/webm", "url": best}

    return {"type": "none", "url": None}


def build_video_list(urls: set[str]) -> list[dict]:
    videos = []
    for url_val in sorted(urls):
        file_name = os.path.basename(url_val.split("?", 1)[0]) or None
        videos.append({
            "url": url_val,
            "type": "m3u8" if MANIFEST_RE.search(url_val) else "mp4/webm" if FILE_RE.search(url_val) else None,
            "file_name": file_name,
        })
    return videos


def get_uc_driver():
    options = uc.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--start-maximized")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-infobars")
    options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
    # driver = uc.Chrome(options=options, use_subprocess=False, driver_executable_path = ChromeDriverManager().install())
    driver = uc.Chrome(options=options, use_subprocess=False, driver_executable_path=CHROMEDRIVER)
    time.sleep(4)
    # print('UC Driver recreated session', driver.session_id)
    return driver


def collect_network_urls(driver) -> set[str]:
    captured = set()
    try:
        logs = driver.get_log("performance")
    except Exception:
        logs = []
    for entry in logs:
        try:
            message = json.loads(entry.get("message", "{}")).get("message", {})
            method = message.get("method")
            params = message.get("params", {})
            url = None
            if method == "Network.requestWillBeSent":
                url = params.get("request", {}).get("url")
            elif method == "Network.responseReceived":
                url = params.get("response", {}).get("url")
            if url and (MANIFEST_RE.search(url) or FILE_RE.search(url) or (CAPTURE_SEGMENTS and SEGMENT_RE.search(url))):
                captured.add(url)
        except Exception:
            continue
    return captured


def _selenium_click(driver, element) -> bool:
    try:
        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
        time.sleep(0.2)
        driver.execute_script("arguments[0].click();", element)
        return True
    except Exception:
        return False


def try_trigger_video_selenium(driver) -> bool:
    triggered = False

    driver.execute_script("window.scrollBy(0, 1400);")
    time.sleep(0.7)
    driver.execute_script("window.scrollBy(0, 1400);")
    time.sleep(0.7)

    # Open gallery modal if present
    try:
        btns = driver.find_elements(
            By.CSS_SELECTOR,
            "div.GalleryThumbnailWrapper-sc-2lpjbq-0.cbkjBo.gallery-thumbnail.shape--rounded.excess > button",
        )
        if not btns:
            btns = driver.find_elements(
                By.XPATH,
                "//div[@data-media-type and "
                "not(contains(translate(@data-media-type,'VIDEO','video'),'video')) and "
                "not(contains(translate(@data-media-type,'SPIN','spin'),'spin'))]"
                "//button[contains(@class,'MediaTilestyles__MediaTileTrigger')]",
            )
        if btns:
            _selenium_click(driver, btns[0])
            time.sleep(1.2)
    except Exception:
        pass

    # Switch to Videos tab in modal
    try:
        tabs = driver.find_elements(
            By.XPATH,
            "//div[contains(@class,'tablist')]//button[.//p[normalize-space()='Videos'] or normalize-space()='Videos']",
        )
        if tabs:
            _selenium_click(driver, tabs[0])
            time.sleep(0.8)
    except Exception:
        pass

    # Click each video thumbnail button in the modal
    try:
        thumb_buttons = driver.find_elements(By.CSS_SELECTOR, "div.gallery-thumbnail-group button")
        for btn in thumb_buttons:
            if _selenium_click(driver, btn):
                time.sleep(1.2)
                triggered = True
    except Exception:
        pass

    selectors = [
        (By.CSS_SELECTOR, "button[aria-label='Play']"),
        (By.CSS_SELECTOR, "button[title='Play']"),
        (By.CSS_SELECTOR, "[data-testid*='play']"),
        (By.XPATH, "//button[contains(normalize-space(.), 'Play')]"),
        (By.CSS_SELECTOR, "video"),
        (By.XPATH, "//*[contains(translate(@alt,'VIDEO','video'),'video')]"),
    ]

    for by, sel in selectors:
        try:
            elems = driver.find_elements(by, sel)
            if elems:
                if _selenium_click(driver, elems[0]):
                    time.sleep(1.2)
                    return True
        except Exception:
            continue

    try:
        elems = driver.find_elements(
            By.XPATH,
            "//*[contains(translate(@class,'VIDEO','video'),'video') or "
            "contains(translate(@aria-label,'VIDEO','video'),'video')]",
        )
        if elems:
            if _selenium_click(driver, elems[0]):
                time.sleep(1.2)
                return True
    except Exception:
        pass

    return triggered

def try_trigger_video(page) -> bool:
    """
    Best-effort click to cause the player to request the stream.
    Lowes UI varies; we try multiple strategies.
    """
    triggered = False

    # Scroll to load media section
    page.mouse.wheel(0, 1400)
    page.wait_for_timeout(700)
    page.mouse.wheel(0, 1400)
    page.wait_for_timeout(700)


    # Open gallery modal if present
    try:
        open_modal_btn = page.locator(
            "div.GalleryThumbnailWrapper-sc-2lpjbq-0.cbkjBo.gallery-thumbnail.shape--rounded.excess > button, "
            "div[data-media-type]:not([data-media-type*='Video' i]):not([data-media-type*='Spin' i]) "
            "button.MediaTilestyles__MediaTileTrigger-sc-vhmy2w-2.jsjQI"
        ).first
        logging.info(page.locator("button.MediaTilestyles__MediaTileTrigger-sc-vhmy2w-2.jsjQI").count())
        logging.info(page.locator("div[data-media-type]").count())
        logging.info(page.locator("div[data-media-type*='Video' i]").count())
        logging.info(page.locator("div[data-media-type*='Spin' i]").count())


        if open_modal_btn.count() > 0:
            open_modal_btn.scroll_into_view_if_needed(timeout=2000)
            page.wait_for_timeout(250)
            open_modal_btn.click(timeout=2500)
            page.wait_for_timeout(1200)
    except Exception:
        pass

    # Switch to Videos tab in modal
    try:
        videos_tab = page.locator("div.tablist button:has-text('Videos')").first
        if videos_tab.count() > 0:
            videos_tab.scroll_into_view_if_needed(timeout=2000)
            page.wait_for_timeout(250)
            videos_tab.click(timeout=2500)
            page.wait_for_timeout(800)
    except Exception:
        pass

    # Click each video thumbnail button in the modal
    try:
        thumb_buttons = page.locator(
            "div.gallery-thumbnail-group button"
        )
        for i in range(thumb_buttons.count()):
            btn = thumb_buttons.nth(i)
            btn.scroll_into_view_if_needed(timeout=2000)
            page.wait_for_timeout(200)
            btn.click(timeout=2500)
            page.wait_for_timeout(1200)
            triggered = True
    except Exception:
        pass

    selectors = [
        "button[aria-label='Play']",
        "button[title='Play']",
        "[data-testid*='play']",
        "button:has-text('Play')",
        "video",
        "img[alt*='video' i]",
    ]

    for sel in selectors:
        try:
            loc = page.locator(sel).first
            if loc.count() > 0:
                loc.scroll_into_view_if_needed(timeout=2000)
                page.wait_for_timeout(250)
                loc.click(timeout=2500)
                page.wait_for_timeout(1200)
                return True
        except Exception:
            continue

    # Sometimes videos are inside carousels/thumbnails
    # Try clicking any element that looks like a video thumbnail
    try:
        thumb = page.locator("[class*='video' i], [aria-label*='video' i]").first
        if thumb.count() > 0:
            thumb.scroll_into_view_if_needed(timeout=2000)
            page.wait_for_timeout(250)
            thumb.click(timeout=2500)
            page.wait_for_timeout(1200)
            return True
    except Exception:
        pass

    return triggered


def capture_video_for_url(page, url: str, sku: str | None = None) -> dict:
    """
    Open PDP, click play, capture stream/file URLs.
    Returns dict with best url + all captured URLs (optional).
    """
    captured = set()

    def on_request(req):
        u = req.url
        if MANIFEST_RE.search(u) or FILE_RE.search(u) or (CAPTURE_SEGMENTS and SEGMENT_RE.search(u)):
            captured.add(u)

    page.on("request", on_request)

    for attempt in range(1, MAX_RETRIES + 1):
        try:
            logging.info(f"Open {url} (attempt {attempt}/{MAX_RETRIES})")
            page.goto(url, wait_until="domcontentloaded", timeout=NAV_TIMEOUT_MS)
            page.wait_for_timeout(2500)

            if SAVE_PDP_HTML:
                html = page.content()
                os.makedirs(PDP_HTML_DIR, exist_ok=True)
                file_key = sku or re.sub(r"\W+", "_", urlparse(url).path.strip("/")) or "pdp"
                html_path = os.path.join(PDP_HTML_DIR, f"{file_key}.html")
                with open(html_path, "w", encoding="utf-8") as f:
                    f.write(html)
                logging.info(f"Saved PDP HTML to {html_path}")

            triggered = try_trigger_video(page)

            # Give it time to request manifest after click
            page.wait_for_timeout(4500 if triggered else 1500)

            best = choose_best_video_url(captured)
            videos = build_video_list(captured)
            page.remove_listener("request", on_request)

            return {
                "best_type": best["type"],
                "best_url": best["url"],
                "all_urls": sorted(captured),
                "videos": videos,
                "triggered_play": triggered,
            }

        except PWTimeoutError:
            logging.warning("Timeout, retrying...")
        except Exception as e:
            logging.warning(f"Error: {e}")

        time.sleep(4 + random.uniform(1, 4))

    page.remove_listener("request", on_request)
    return {"best_type": "none", "best_url": None, "all_urls": [], "videos": [], "triggered_play": False}


def capture_video_for_url_selenium(driver, url: str, sku: str | None = None) -> dict:
    captured = set()

    for attempt in range(1, MAX_RETRIES + 1):
        try:
            logging.info(f"Open {url} (attempt {attempt}/{MAX_RETRIES})")
            try:
                driver.get_log("performance")
            except Exception:
                pass
            driver.get(url)
            time.sleep(2.5)
            if VPN_ENABLED and VPN_ROTATOR is not None and parse_document_status_from_performance_logs is not None:
                try:
                    status = parse_document_status_from_performance_logs(driver.get_log("performance"), url)
                    if should_rotate_on_status is not None and should_rotate_on_status(status):
                        VPN_ROTATOR.rotate(f"HTTP {status} for {url}")
                        continue
                except Exception:
                    pass
            if VPN_ENABLED and VPN_ROTATOR is not None and is_dns_error_html is not None:
                try:
                    if is_dns_error_html(driver.page_source):
                        VPN_ROTATOR.rotate(f"DNS error for {url}")
                        continue
                except Exception:
                    pass

            if SAVE_PDP_HTML:
                os.makedirs(PDP_HTML_DIR, exist_ok=True)
                file_key = sku or re.sub(r"\W+", "_", urlparse(url).path.strip("/")) or "pdp"
                html_path = os.path.join(PDP_HTML_DIR, f"{file_key}.html")
                with open(html_path, "w", encoding="utf-8") as f:
                    f.write(driver.page_source)
                logging.info(f"Saved PDP HTML to {html_path}")

            triggered = try_trigger_video_selenium(driver)
            time.sleep(4.5 if triggered else 1.5)

            captured |= collect_network_urls(driver)

            best = choose_best_video_url(captured)
            videos = build_video_list(captured)
            return {
                "best_type": best["type"],
                "best_url": best["url"],
                "all_urls": sorted(captured),
                "videos": videos,
                "triggered_play": triggered,
            }
        except Exception as e:
            logging.warning(f"Error: {e}")

        time.sleep(2 + random.uniform(0.8, 2.5))

    return {"best_type": "none", "best_url": None, "all_urls": [], "videos": [], "triggered_play": False}


def main():
    global VPN_ROTATOR
    if VPN_ENABLED and WireGuardRotator is not None:
        VPN_ROTATOR = WireGuardRotator(WG_CONFIG_DIR, cooldown_sec=WG_COOLDOWN_SEC)
        VPN_ROTATOR.ensure_up()
        atexit.register(VPN_ROTATOR.shutdown)

    parser = argparse.ArgumentParser(description="Capture Lowes video URLs.")
    parser.add_argument(
        "--partner-urls",
        default=PARTNER_URLS_FILE,
        help="CSV with SKU + Lowes URL columns (default: partnerUrls.csv).",
    )
    parser.add_argument(
        "--output",
        default=OUTPUT_FILE,
        help="Output JSON file (default: LowesVideos.json).",
    )
    args = parser.parse_args()
    # ---- DB fetch
    # try:
    #     connection = mysql.connector.connect(**db_config)
    # except mysql.connector.Error as e:
    #     logging.error(f"DB connection error: {e}")
    #     return

    # try:
        # cursor = connection.cursor(dictionary=True)
        # cursor.execute(QUERY)
        # results = cursor.fetchall()
        # df = pd.DataFrame(results)
        # df["Full_URL"] = df["LINK"]
        # urls = df["Full_URL"].dropna().tolist()
        # skus = df["SKU"].tolist() if "SKU" in df.columns else [None] * len(urls)
    pairs = load_partner_urls(args.partner_urls, "Lowes URL")
    urls = [p[1] for p in pairs]
    skus = [p[0] for p in pairs]
    if not urls:
        logging.warning("No URLs loaded")
        return
    logging.info(f"Loaded {len(urls)} URLs")
    # finally:
    #     try:
    #         cursor.close()
    #         connection.close()
    #     except Exception:
    #         pass

    out = []

    driver = ensure_driver_ready(get_uc_driver())
    try:
        for i, url in enumerate(urls):
            if i and i % ROTATE_CONTEXT_EVERY == 0:
                try:
                    driver.quit()
                except Exception:
                    pass
                time.sleep(2)
                driver = ensure_driver_ready(get_uc_driver())

            sku = skus[i] if i < len(skus) else None
            try:
                video_info = capture_video_for_url_selenium(driver, url, sku=sku)
            except Exception as e:
                raise

            row = {
                "SKU": skus[i] if i < len(skus) else None,
                "page_url": url,
                "video_type": video_info["best_type"],
                "video_url": video_info["best_url"],
                "videos": video_info.get("videos", []),
                "triggered_play": video_info["triggered_play"],
                # keep this if you want debugging; remove to keep file smaller
                "all_captured_urls": video_info["all_urls"],
            }

            out.append(row)
            logging.info(f"Processed {i+1}/{len(urls)} -> {len(row['videos'])} videos")
            time.sleep(5 + random.uniform(2.0, 4.5))
    finally:
        try:
            driver.quit()
        except Exception:
            pass

    output_path = os.environ.get("OUTPUT_PATH") or args.output
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(out, f, indent=2, ensure_ascii=False)

    print(json.dumps(out, indent=2, ensure_ascii=False))
    logging.info(f"Saved -> {output_path}")


if __name__ == "__main__":
    main()
