"""
微信公众号文章抓取服务
通过微信网页版登录，抓取指定公众号的文章
"""
import json
import time
import uuid
import base64
import requests
import re
import os
from io import BytesIO
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional

app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])

# In-memory session store
sessions = {}

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Referer": "https://mp.weixin.qq.com/",
}

@app.get("/api/health")
def health():
    return {"status": "ok", "time": time.time()}

@app.post("/api/login/init")
def login_init():
    """获取微信网页版登录二维码"""
    session_id = str(uuid.uuid4())
    
    # Get QR code UUID from WeChat
    r = requests.get(
        "https://login.wx.qq.com/jslogin",
        params={
            "appid": "wx782c26e4c19acffb",
            "redirect_uri": "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage",
            "fun": "new",
            "lang": "zh_CN",
            "_": int(time.time() * 1000),
        },
        headers=HEADERS,
        timeout=10,
    )
    
    uuid_match = re.search(r'uuid = "([^"]+)"', r.text)
    if not uuid_match:
        return {"error": "获取二维码失败", "raw": r.text[:200]}
    
    wx_uuid = uuid_match.group(1)
    
    # Generate QR code image
    import qrcode
    qr_url = f"https://login.weixin.qq.com/l/{wx_uuid}"
    qr = qrcode.QRCode(version=1, box_size=10, border=4)
    qr.add_data(qr_url)
    qr.make(fit=True)
    img = qr.make_image(fill_color="black", back_color="white")
    
    buf = BytesIO()
    img.save(buf, format="PNG")
    qr_b64 = base64.b64encode(buf.getvalue()).decode()
    
    sessions[session_id] = {
        "wx_uuid": wx_uuid,
        "status": "waiting",
        "cookie": None,
        "token": None,
        "created_at": time.time(),
    }
    
    return {
        "session_id": session_id,
        "qr_code": qr_b64,
        "qr_url": qr_url,
    }

@app.get("/api/login/status/{session_id}")
def login_status(session_id: str):
    """检查登录状态"""
    if session_id not in sessions:
        return {"status": "error", "message": "session not found"}
    
    s = sessions[session_id]
    
    if s["status"] == "success":
        return {"status": "success", "cookie": s["cookie"], "token": s["token"]}
    
    wx_uuid = s["wx_uuid"]
    
    # Poll WeChat login status
    r = requests.get(
        "https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login",
        params={
            "loginicon": "true",
            "uuid": wx_uuid,
            "tip": "0",
            "r": int(time.time()),
            "_": int(time.time() * 1000),
        },
        headers=HEADERS,
        timeout=10,
    )
    
    code_match = re.search(r"window.code=(\d+)", r.text)
    if not code_match:
        return {"status": "waiting"}
    
    code = code_match.group(1)
    
    if code == "408":
        return {"status": "waiting", "message": "等待扫码"}
    elif code == "400":
        s["status"] = "expired"
        return {"status": "expired", "message": "二维码已过期"}
    elif code == "201":
        return {"status": "scanned", "message": "已扫码，请在手机上确认"}
    elif code == "200":
        # Login success - get redirect URL with cookie
        redirect_match = re.search(r'redirect_uri="([^"]+)"', r.text)
        if not redirect_match:
            return {"status": "error", "message": "登录成功但获取redirect_uri失败"}
        
        redirect_uri = redirect_match.group(1)
        
        # Follow redirect to get cookies
        r2 = requests.get(redirect_uri, headers=HEADERS, allow_redirects=False, timeout=10)
        cookies = r2.cookies.get_dict()
        cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()])
        
        # Also get from Set-Cookie header
        if not cookie_str and "Set-Cookie" in r2.headers:
            cookie_str = r2.headers.get("Set-Cookie", "")
        
        # Get token from response body
        r3 = requests.get(redirect_uri, headers=HEADERS, timeout=10)
        token_match = re.search(r'"token":(\d+)', r3.text)
        token = token_match.group(1) if token_match else ""
        
        # Try MP platform login
        mp_r = requests.get(
            "https://mp.weixin.qq.com/",
            headers={**HEADERS, "Cookie": cookie_str},
            timeout=10,
        )
        mp_cookies = {**cookies, **mp_r.cookies.get_dict()}
        cookie_str = "; ".join([f"{k}={v}" for k, v in mp_cookies.items()])
        
        token_match2 = re.search(r'token=(\d+)', mp_r.url)
        if token_match2:
            token = token_match2.group(1)
        
        s["status"] = "success"
        s["cookie"] = cookie_str
        s["token"] = token
        
        return {"status": "success", "cookie": cookie_str, "token": token}
    
    return {"status": "waiting", "code": code}


class DownloadRequest(BaseModel):
    account_name: str
    cookie: str
    token: str
    date_range: str = "last_month"


@app.post("/api/download/articles")
def download_articles(req: DownloadRequest):
    """搜索公众号并下载文章列表"""
    
    headers = {
        **HEADERS,
        "Cookie": req.cookie,
    }
    
    # Search for the account fakeid
    search_r = requests.get(
        "https://mp.weixin.qq.com/cgi-bin/searchbiz",
        params={
            "action": "search_biz",
            "begin": "0",
            "count": "5",
            "query": req.account_name,
            "token": req.token,
            "lang": "zh_CN",
            "f": "json",
            "ajax": "1",
        },
        headers=headers,
        timeout=15,
    )
    
    try:
        search_data = search_r.json()
    except:
        return {"error": "搜索失败，cookie可能已过期", "raw": search_r.text[:300]}
    
    if search_data.get("base_resp", {}).get("ret") != 0:
        return {"error": "搜索出错", "detail": search_data}
    
    biz_list = search_data.get("list", [])
    if not biz_list:
        return {"error": f"未找到公众号: {req.account_name}"}
    
    biz = biz_list[0]
    fakeid = biz.get("fakeid")
    nickname = biz.get("nickname")
    
    # Calculate date threshold
    now = time.time()
    if req.date_range == "last_week":
        threshold = now - 7 * 86400
    elif req.date_range == "last_month":
        threshold = now - 30 * 86400
    elif req.date_range == "last_3months":
        threshold = now - 90 * 86400
    else:
        threshold = 0
    
    # Get article list
    articles = []
    begin = 0
    
    while True:
        time.sleep(2)  # Rate limiting
        
        list_r = requests.get(
            "https://mp.weixin.qq.com/cgi-bin/appmsg",
            params={
                "action": "list_ex",
                "begin": str(begin),
                "count": "20",
                "fakeid": fakeid,
                "type": "9",
                "token": req.token,
                "lang": "zh_CN",
                "f": "json",
                "ajax": "1",
            },
            headers=headers,
            timeout=15,
        )
        
        try:
            list_data = list_r.json()
        except:
            break
        
        if list_data.get("base_resp", {}).get("ret") != 0:
            break
        
        app_list = list_data.get("app_msg_list", [])
        if not app_list:
            break
        
        for item in app_list:
            create_time = item.get("create_time", 0)
            if threshold and create_time < threshold:
                return {
                    "account": nickname,
                    "fakeid": fakeid,
                    "total": len(articles),
                    "articles": articles,
                }
            
            articles.append({
                "title": item.get("title"),
                "link": item.get("link"),
                "create_time": create_time,
                "date": time.strftime("%Y-%m-%d", time.localtime(create_time)),
                "digest": item.get("digest", ""),
                "cover": item.get("cover", ""),
            })
        
        begin += 20
        
        if len(app_list) < 20:
            break
    
    return {
        "account": nickname,
        "fakeid": fakeid,
        "total": len(articles),
        "articles": articles,
    }


@app.get("/api/article/content")
def get_article_content(url: str, cookie: str = ""):
    """获取文章正文内容"""
    headers = {**HEADERS}
    if cookie:
        headers["Cookie"] = cookie
    
    r = requests.get(url, headers=headers, timeout=15)
    
    # Extract text content
    text = re.sub(r'<script[^>]*>.*?</script>', '', r.text, flags=re.DOTALL)
    text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL)
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Extract title
    title_match = re.search(r'<h1[^>]*class="[^"]*rich_media_title[^"]*"[^>]*>(.*?)</h1>', r.text, re.DOTALL)
    title = re.sub(r'<[^>]+>', '', title_match.group(1)).strip() if title_match else ""
    
    return {
        "title": title,
        "content": text[:5000],
        "url": url,
    }


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8001)
