quark-auto-save/app/sdk/pansou.py
x1ao4 4bd7d73514 修复 pan.qoark.cn 重定向链接无法正常访问的问题
问题:
- 搜索结果中的 pan.qoark.cn 链接在 qasx 内无法正常访问
- 这些链接实际会重定向到 pan.quark.cn 的真实地址

修复:
- 在 PanSou 搜索结果处理中添加重定向解析,自动获取真实地址
- 在后端 get_share_detail 接口中添加重定向解析
- 前端支持显示和提取 pan.qoark.cn 链接的资源 ID
2026-01-10 15:40:21 +08:00

273 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
from typing import List, Dict, Any
class PanSou:
"""PanSou 资源搜索客户端"""
def __init__(self, server: str):
self.server = server.rstrip("/") if server else ""
self.session = requests.Session()
# 使用标准请求头
self.session.headers.update({
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": "QASX-PanSouClient/1.0"
})
def _request_json(self, url: str, params: dict):
"""发送 GET 请求并解析 JSON 响应"""
try:
resp = self.session.get(url, params=params, timeout=15)
return resp.json()
except Exception as e:
return {"success": False, "message": str(e)}
def _resolve_qoark_redirect(self, url: str) -> str:
"""
解析 pan.qoark.cn 链接的重定向地址
如果链接是 pan.qoark.cn则获取重定向后的真实地址
如果不是,则直接返回原链接
"""
if not isinstance(url, str) or "pan.qoark.cn" not in url:
return url
try:
# 创建新的 session 用于重定向解析(避免影响主 session
redirect_session = requests.Session()
redirect_session.max_redirects = 10
# 只获取头信息不获取完整内容HEAD 请求更快)
resp = redirect_session.head(url, allow_redirects=True, timeout=10)
# 如果成功重定向到 pan.quark.cn返回重定向后的地址
if resp.status_code == 200 and "pan.quark.cn" in resp.url:
return resp.url
# 如果 HEAD 请求失败,尝试 GET 请求(某些服务器可能不支持 HEAD
elif resp.status_code != 200:
resp = redirect_session.get(url, allow_redirects=True, timeout=10)
if resp.status_code == 200 and "pan.quark.cn" in resp.url:
return resp.url
# 如果重定向失败或没有重定向到 pan.quark.cn返回原链接
return url
except Exception as e:
# 如果解析失败,返回原链接(避免影响其他功能)
# 可以在这里添加日志记录错误
return url
def _get_pansou_source(self, result_item: dict, merged_item: dict = None) -> str:
"""
获取PanSou内部来源信息
返回格式:
- "tg:频道名称" - 来自Telegram频道需要+8小时
- "plugin:插件名" - 来自指定插件,不+8小时
- "unknown" - 未知来源,不+8小时
"""
# 优先从 results 的 channel 字段判断
if result_item and result_item.get("channel"):
channel = result_item.get("channel", "").strip()
if channel:
return f"tg:{channel}"
# 从 merged_by_type 的 source 字段获取
if merged_item and merged_item.get("source"):
source = merged_item.get("source", "").strip()
if source:
return source
# 默认返回 unknown
return "unknown"
def search(self, keyword: str):
"""
搜索资源(仅返回夸克网盘结果)
返回:{"success": True, "data": [{taskname, content, shareurl, tags[], pansou_source}]}
"""
if not self.server:
return {"success": False, "message": "PanSou未配置服务器"}
# 使用已验证的参数kw + cloud_types=quark + res=all
params = {
"kw": keyword,
"cloud_types": "quark", # 单个类型用字符串,多个类型用逗号分隔
"res": "all"
}
# 优先使用 /api/search 路径
url = f"{self.server}/api/search"
result = self._request_json(url, params)
if not result:
return {"success": False, "message": "PanSou请求失败"}
# 解析响应:兼容 {code, message, data: {results, merged_by_type}} 格式
payload = result
if isinstance(result.get("data"), dict):
payload = result["data"]
# 检查错误码
if "code" in result and result.get("code") != 0:
return {"success": False, "message": result.get("message") or "PanSou搜索失败"}
# 解析结果:优先 results然后 merged_by_type
cleaned = []
# 工具:移除标题中的链接
def strip_links(text: str) -> str:
if not isinstance(text, str):
return text
s = text
import re
s = re.sub(r"https?://\S+", "", s)
s = re.sub(r"\bpan\.quark\.cn/\S+", "", s)
s = re.sub(r"\s+", " ", s).strip(" -|·,:;" + " ")
return s.strip()
try:
# 1) results: 主要结果数组,每个结果包含 title 和 links
results = payload.get("results", [])
if isinstance(results, list):
for result_item in results:
if not isinstance(result_item, dict):
continue
# 从 result_item 获取标题、内容和发布日期
title = result_item.get("title", "")
title = strip_links(title)
content = result_item.get("content", "")
datetime_str = result_item.get("datetime", "") # 获取发布日期
# 获取PanSou内部来源
pansou_source = self._get_pansou_source(result_item)
# 从 links 获取具体链接
links = result_item.get("links", [])
if isinstance(links, list):
for link in links:
if isinstance(link, dict):
url = link.get("url", "")
link_type = link.get("type", "")
if url: # 确保有有效链接
# 解析 pan.qoark.cn 链接的重定向地址
url = self._resolve_qoark_redirect(url)
cleaned.append({
"taskname": title,
"content": content,
"shareurl": url,
"tags": [link_type] if link_type else (result_item.get("tags", []) or []),
"publish_date": datetime_str, # 原始时间(可能是 ISO
"source": "PanSou", # 添加来源标识
"pansou_source": pansou_source # 添加PanSou内部来源
})
# 2) merged_by_type: 兜底解析,使用 note 字段作为标题
if not cleaned:
merged = payload.get("merged_by_type")
if isinstance(merged, dict):
for cloud_type, links in merged.items():
if isinstance(links, list):
for link in links:
if isinstance(link, dict):
# 从 merged_by_type 获取链接信息
url = link.get("url", "")
note = link.get("note", "") # 使用 note 字段作为标题
note = strip_links(note)
datetime_str = link.get("datetime", "") # 获取发布日期
# 获取PanSou内部来源
pansou_source = self._get_pansou_source(None, link)
if url:
# 解析 pan.qoark.cn 链接的重定向地址
url = self._resolve_qoark_redirect(url)
cleaned.append({
"taskname": note,
"content": note, # 如果没有 content使用 note
"shareurl": url,
"tags": [cloud_type] if cloud_type else [],
"publish_date": datetime_str, # 原始时间
"source": "PanSou", # 添加来源标识
"pansou_source": pansou_source # 添加PanSou内部来源
})
# 3) 直接 data 数组兜底
if not cleaned and isinstance(payload, list):
for item in payload:
if isinstance(item, dict):
url = item.get("url", "")
# 解析 pan.qoark.cn 链接的重定向地址
if url:
url = self._resolve_qoark_redirect(url)
cleaned.append({
"taskname": item.get("title", ""),
"content": item.get("content", ""),
"shareurl": url,
"tags": item.get("tags", []) or [],
"publish_date": item.get("datetime", ""), # 原始时间
"source": "PanSou", # 添加来源标识
"pansou_source": "unknown" # 兜底来源
})
except Exception as e:
return {"success": False, "message": f"解析PanSou结果失败: {str(e)}"}
# 二次过滤:确保只返回夸克网盘链接
if cleaned:
filtered = []
for item in cleaned:
try:
url = item.get("shareurl", "")
tags = item.get("tags", []) or []
# 检查是否为夸克网盘(支持 pan.quark.cn 和已解析的 pan.qoark.cn
is_quark = ("quark" in tags) or ("pan.quark.cn" in url) or ("pan.qoark.cn" in url)
if is_quark:
filtered.append(item)
except Exception:
continue
cleaned = filtered
if not cleaned:
return {"success": False, "message": "PanSou搜索无夸克网盘结果"}
# 去重:按 shareurl 归并,保留发布时间最新的记录
def to_ts(date_str: str) -> float:
if not date_str:
return 0
try:
s = str(date_str).strip()
from datetime import datetime
try:
return datetime.strptime(s, "%Y-%m-%d %H:%M:%S").timestamp()
except Exception:
pass
try:
return datetime.strptime(s, "%Y-%m-%d").timestamp()
except Exception:
pass
try:
s2 = s.replace('Z', '+00:00')
return datetime.fromisoformat(s2).timestamp()
except Exception:
return 0
except Exception:
return 0
by_url = {}
for item in cleaned:
try:
url = item.get("shareurl", "")
if not url:
continue
existed = by_url.get(url)
if not existed:
by_url[url] = item
else:
# 比较 publish_date若不存在则视为0
if to_ts(item.get("publish_date")) > to_ts(existed.get("publish_date")):
by_url[url] = item
except Exception:
continue
unique_results = list(by_url.values())
return {"success": True, "data": unique_results}