From 5c50453acd2d163c835b2391711539242e6284ee Mon Sep 17 00:00:00 2001 From: x1ao4 Date: Wed, 27 Aug 2025 01:05:19 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=B5=84=E6=BA=90=E6=90=9C?= =?UTF-8?q?=E7=B4=A2=E7=BB=93=E6=9E=9C=E5=8F=91=E5=B8=83=E6=97=A5=E6=9C=9F?= =?UTF-8?q?=E6=97=B6=E5=8C=BA=EF=BC=88=E6=98=BE=E7=A4=BA=EF=BC=89=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/run.py | 24 +++++++++++++---- app/sdk/cloudsaver.py | 40 +++++++++++++--------------- app/sdk/pansou.py | 52 ++++++++++++------------------------- app/templates/index.html | 56 ++++++++++++++++++++++++++++++---------- 4 files changed, 96 insertions(+), 76 deletions(-) diff --git a/app/run.py b/app/run.py index 9315fd4..d4f168f 100644 --- a/app/run.py +++ b/app/run.py @@ -1061,19 +1061,33 @@ def get_task_suggestions(): seen_fingerprints.add(fingerprint) dedup.append(item) - # 全局时间排序:所有来源的结果混合排序,按时间倒序(最新的在前) + # 仅在排序时对多种格式进行解析(优先解析 YYYY-MM-DD HH:mm:ss,其次 ISO) if dedup: def parse_datetime_for_sort(item): """解析时间字段,返回可比较的时间戳(统一以 publish_date 为准)""" datetime_str = item.get("publish_date") if not datetime_str: return 0 # 没有时间的排在最后 + from datetime import datetime + s = str(datetime_str).strip() + # 优先解析标准显示格式 try: - from datetime import datetime - # 尝试解析格式: 2025-01-01 12:00:00 - dt = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S") + dt = datetime.strptime(s, "%Y-%m-%d %H:%M:%S") return dt.timestamp() - except: + except Exception: + pass + # 补充解析仅日期格式 + try: + dt = datetime.strptime(s, "%Y-%m-%d") + return dt.timestamp() + except Exception: + pass + # 其次尝试 ISO(支持 Z/偏移) + try: + s2 = s.replace('Z', '+00:00') + dt = datetime.fromisoformat(s2) + return dt.timestamp() + except Exception: return 0 # 解析失败排在最后 # 按时间倒序排序(最新的在前) diff --git a/app/sdk/cloudsaver.py b/app/sdk/cloudsaver.py index 8509118..802f185 100644 --- a/app/sdk/cloudsaver.py +++ b/app/sdk/cloudsaver.py @@ -106,6 +106,19 @@ class CloudSaver: pattern_title = r"(名称|标题)[::]?(.*)" pattern_content = r"(描述|简介)[::]?(.*)(链接|标签)" clean_results = [] + # 工具:移除标题中的链接(http/https 以及常见裸域名的夸克分享) + def strip_links(text: str) -> str: + if not isinstance(text, str): + return text + s = text + import re + # 去除 http/https 链接 + s = re.sub(r"https?://\S+", "", s) + # 去除裸域夸克分享链接(不带协议的 pan.quark.cn/...) + s = re.sub(r"\bpan\.quark\.cn/\S+", "", s) + # 收尾多余空白和分隔符 + s = re.sub(r"\s+", " ", s).strip(" -|·,,::;;" + " ") + return s.strip() link_array = [] for channel in search_results: for item in channel.get("list", []): @@ -117,6 +130,8 @@ class CloudSaver: if match := re.search(pattern_title, title, re.DOTALL): title = match.group(2) title = title.replace("&", "&").strip() + # 标题去除链接 + title = strip_links(title) # 清洗内容 content = item.get("content", "") if match := re.search(pattern_content, content, re.DOTALL): @@ -125,9 +140,8 @@ class CloudSaver: content = content.replace("", "") content = content.strip() # 获取发布时间 - 采用与原始实现一致的方式 - pubdate = item.get("pubDate", "") # 使用 pubDate 字段 - if pubdate: - pubdate = self._iso_to_cst(pubdate) # 转换为中国标准时间 + pubdate_iso = item.get("pubDate", "") # 原始时间字符串(可能为 ISO 或已是北京时间) + pubdate = pubdate_iso # 不做时区转换,保留来源原始时间 # 链接去重 if link.get("link") not in link_array: link_array.append(link.get("link")) @@ -136,7 +150,7 @@ class CloudSaver: "shareurl": link.get("link"), "taskname": title, "content": content, - "datetime": pubdate, # 使用 datetime 字段名,与原始实现一致 + "datetime": pubdate, # 显示用时间 "tags": item.get("tags", []), "channel": item.get("channelId", ""), "source": "CloudSaver" @@ -146,24 +160,6 @@ class CloudSaver: # 注意:排序逻辑已移至全局,这里不再进行内部排序 # 返回原始顺序的结果,由全局排序函数统一处理 return clean_results - - def _iso_to_cst(self, iso_time_str: str) -> str: - """将 ISO 格式的时间字符串转换为 CST(China Standard Time) 时间并格式化为 %Y-%m-%d %H:%M:%S 格式 - - Args: - iso_time_str (str): ISO 格式时间字符串 - - Returns: - str: CST(China Standard Time) 时间字符串 - """ - try: - from datetime import datetime, timezone, timedelta - dt = datetime.fromisoformat(iso_time_str) - dt_cst = dt.astimezone(timezone(timedelta(hours=8))) - return dt_cst.strftime("%Y-%m-%d %H:%M:%S") if dt_cst.year >= 1970 else "" - except: - return iso_time_str # 转换失败时返回原始字符串 - # 测试示例 if __name__ == "__main__": diff --git a/app/sdk/pansou.py b/app/sdk/pansou.py index 726c3f3..f5ffb5a 100644 --- a/app/sdk/pansou.py +++ b/app/sdk/pansou.py @@ -57,6 +57,16 @@ class PanSou: # 解析结果:优先 results,然后 merged_by_type cleaned = [] + # 工具:移除标题中的链接 + def strip_links(text: str) -> str: + if not isinstance(text, str): + return text + s = text + import re + s = re.sub(r"https?://\S+", "", s) + s = re.sub(r"\bpan\.quark\.cn/\S+", "", s) + s = re.sub(r"\s+", " ", s).strip(" -|·,,::;;" + " ") + return s.strip() try: # 1) results: 主要结果数组,每个结果包含 title 和 links @@ -68,6 +78,7 @@ class PanSou: # 从 result_item 获取标题、内容和发布日期 title = result_item.get("title", "") + title = strip_links(title) content = result_item.get("content", "") datetime_str = result_item.get("datetime", "") # 获取发布日期 @@ -84,7 +95,7 @@ class PanSou: "content": content, "shareurl": url, "tags": [link_type] if link_type else (result_item.get("tags", []) or []), - "publish_date": datetime_str, # 添加发布日期字段 + "publish_date": datetime_str, # 原始时间(可能是 ISO) "source": "PanSou" # 添加来源标识 }) @@ -99,6 +110,7 @@ class PanSou: # 从 merged_by_type 获取链接信息 url = link.get("url", "") note = link.get("note", "") # 使用 note 字段作为标题 + note = strip_links(note) datetime_str = link.get("datetime", "") # 获取发布日期 if url: cleaned.append({ @@ -106,7 +118,7 @@ class PanSou: "content": note, # 如果没有 content,使用 note "shareurl": url, "tags": [cloud_type] if cloud_type else [], - "publish_date": datetime_str, # 添加发布日期字段 + "publish_date": datetime_str, # 原始时间 "source": "PanSou" # 添加来源标识 }) @@ -119,7 +131,7 @@ class PanSou: "content": item.get("content", ""), "shareurl": item.get("url", ""), "tags": item.get("tags", []) or [], - "publish_date": item.get("datetime", ""), # 添加发布日期字段 + "publish_date": item.get("datetime", ""), # 原始时间 "source": "PanSou" # 添加来源标识 }) @@ -152,37 +164,5 @@ class PanSou: if url and url not in seen_urls: seen_urls.add(url) unique_results.append(item) - - # 按发布日期排序:最新的在前 - def parse_datetime(datetime_str): - """解析日期时间字符串,返回可比较的时间戳""" - if not datetime_str: - return 0 # 没有日期的排在最后 - try: - from datetime import datetime, timezone, timedelta - # 尝试解析 ISO 8601 格式: 2025-07-28T20:43:27Z - dt = datetime.fromisoformat(datetime_str.replace('Z', '+00:00')) - return dt.timestamp() - except: - return 0 # 解析失败排在最后 - - def convert_to_cst(datetime_str): - """将 ISO 时间转换为中国标准时间 (CST)""" - if not datetime_str: - return "" - try: - from datetime import datetime, timezone, timedelta - dt = datetime.fromisoformat(datetime_str.replace('Z', '+00:00')) - dt_cst = dt.astimezone(timezone(timedelta(hours=8))) - return dt_cst.strftime("%Y-%m-%d %H:%M:%S") - except: - return datetime_str # 转换失败时返回原始字符串 - - # 转换时间为中国标准时间格式 - for item in unique_results: - if item.get("publish_date"): - item["publish_date"] = convert_to_cst(item["publish_date"]) - - # 注意:排序逻辑已移至全局,这里不再进行内部排序 - # 返回原始顺序的结果,由全局排序函数统一处理 + return {"success": True, "data": unique_results} diff --git a/app/templates/index.html b/app/templates/index.html index e6fb489..35b9d38 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -587,7 +587,7 @@

搜索来源

- +
@@ -1026,7 +1026,7 @@ {{ suggestion.taskname }} · {{ suggestion.shareurl.replace(/^https?:\/\/pan\.quark\.cn\/s\//, '') }} - + @@ -1954,7 +1954,7 @@ {{ suggestion.taskname }} · {{ suggestion.shareurl.replace(/^https?:\/\/pan\.quark\.cn\/s\//, '') }} - + @@ -4329,11 +4329,7 @@ const batchSize = 5; // 解析时间用于排序(降序:最新在前) - const getItemTs = (item) => { - const raw = item.publish_date || ''; - const ts = Date.parse(raw); - return isNaN(ts) ? 0 : ts; - }; + const getItemTs = (item) => this.parsePublishTs(item && item.publish_date); // 处理单个链接的函数 const processLink = (link) => { @@ -4462,11 +4458,7 @@ this.smart_param._hasShownInterimResults = false; // 结束前做一次排序,确保最终顺序正确 - const getItemTs = (item) => { - const raw = item.publish_date || ''; - const ts = Date.parse(raw); - return isNaN(ts) ? 0 : ts; - }; + const getItemTs = (item) => this.parsePublishTs(item && item.publish_date); validResults.sort((a, b) => getItemTs(b) - getItemTs(a)); // 更新搜索结果 @@ -6535,6 +6527,44 @@ const seconds = String(d.getSeconds()).padStart(2, '0'); return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`; }, + // 统一解析资源发布日期为时间戳 + parsePublishTs(raw) { + if (!raw) return 0; + const s = String(raw).trim(); + // YYYY-MM-DD HH:mm:ss + let m = /^\s*(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\s*$/.exec(s); + if (m) { + const [, y, mo, d, h, mi, se] = m; + return new Date(Number(y), Number(mo) - 1, Number(d), Number(h), Number(mi), Number(se)).getTime(); + } + // YYYY-MM-DD + m = /^\s*(\d{4})-(\d{2})-(\d{2})\s*$/.exec(s); + if (m) { + const [, y, mo, d] = m; + return new Date(Number(y), Number(mo) - 1, Number(d), 0, 0, 0).getTime(); + } + // ISO 回退 + const ts = Date.parse(s); + return isNaN(ts) ? 0 : ts; + }, + // 规范化资源发布日期展示:将 ISO 格式(含 T/Z/偏移)转为 "YYYY-MM-DD HH:mm:ss" + formatPublishDate(value) { + if (!value) return ''; + const s = String(value).trim(); + // 已是标准格式则直接返回 + if (/^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}$/.test(s)) return s; + // 优先匹配 ISO 主体部分 + const m = /^(\d{4})-(\d{2})-(\d{2})[T ](\d{2}):(\d{2}):(\d{2})/.exec(s); + if (m) { + const [, y, mo, d, h, mi, se] = m; + return `${y}-${mo}-${d} ${h}:${mi}:${se}`; + } + // 回退:简单替换T为空格并去除尾部Z/时区偏移 + let out = s.replace('T', ' '); + out = out.replace(/Z$/i, ''); + out = out.replace(/([+-]\d{2}:?\d{2})$/i, ''); + return out; + }, changeFolderPage(page) { if (page < 1) page = 1; if (page > this.fileManager.totalPages) page = this.fileManager.totalPages;