From 5c50453acd2d163c835b2391711539242e6284ee Mon Sep 17 00:00:00 2001
From: x1ao4 <kazaf_ken@163.com>
Date: Wed, 27 Aug 2025 01:05:19 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=B5=84=E6=BA=90=E6=90=9C?=
 =?UTF-8?q?=E7=B4=A2=E7=BB=93=E6=9E=9C=E5=8F=91=E5=B8=83=E6=97=A5=E6=9C=9F?=
 =?UTF-8?q?=E6=97=B6=E5=8C=BA=EF=BC=88=E6=98=BE=E7=A4=BA=EF=BC=89=E9=94=99?=
 =?UTF-8?q?=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/run.py               | 24 +++++++++++++----
 app/sdk/cloudsaver.py    | 40 +++++++++++++---------------
 app/sdk/pansou.py        | 52 ++++++++++++-------------------------
 app/templates/index.html | 56 ++++++++++++++++++++++++++++++----------
 4 files changed, 96 insertions(+), 76 deletions(-)

diff --git a/app/run.py b/app/run.py
index 9315fd4..d4f168f 100644
--- a/app/run.py
+++ b/app/run.py
@@ -1061,19 +1061,33 @@ def get_task_suggestions():
             seen_fingerprints.add(fingerprint)
             dedup.append(item)
 
-        # 全局时间排序：所有来源的结果混合排序，按时间倒序（最新的在前）
+        # 仅在排序时对多种格式进行解析（优先解析 YYYY-MM-DD HH:mm:ss，其次 ISO）
         if dedup:
             def parse_datetime_for_sort(item):
                 """解析时间字段，返回可比较的时间戳（统一以 publish_date 为准）"""
                 datetime_str = item.get("publish_date")
                 if not datetime_str:
                     return 0  # 没有时间的排在最后
+                from datetime import datetime
+                s = str(datetime_str).strip()
+                # 优先解析标准显示格式
                 try:
-                    from datetime import datetime
-                    # 尝试解析格式: 2025-01-01 12:00:00
-                    dt = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
+                    dt = datetime.strptime(s, "%Y-%m-%d %H:%M:%S")
                     return dt.timestamp()
-                except:
+                except Exception:
+                    pass
+                # 补充解析仅日期格式
+                try:
+                    dt = datetime.strptime(s, "%Y-%m-%d")
+                    return dt.timestamp()
+                except Exception:
+                    pass
+                # 其次尝试 ISO（支持 Z/偏移）
+                try:
+                    s2 = s.replace('Z', '+00:00')
+                    dt = datetime.fromisoformat(s2)
+                    return dt.timestamp()
+                except Exception:
                     return 0  # 解析失败排在最后
             
             # 按时间倒序排序（最新的在前）
diff --git a/app/sdk/cloudsaver.py b/app/sdk/cloudsaver.py
index 8509118..802f185 100644
--- a/app/sdk/cloudsaver.py
+++ b/app/sdk/cloudsaver.py
@@ -106,6 +106,19 @@ class CloudSaver:
         pattern_title = r"(名称|标题)[：:]?(.*)"
         pattern_content = r"(描述|简介)[：:]?(.*)(链接|标签)"
         clean_results = []
+        # 工具：移除标题中的链接（http/https 以及常见裸域名的夸克分享）
+        def strip_links(text: str) -> str:
+            if not isinstance(text, str):
+                return text
+            s = text
+            import re
+            # 去除 http/https 链接
+            s = re.sub(r"https?://\S+", "", s)
+            # 去除裸域夸克分享链接（不带协议的 pan.quark.cn/...）
+            s = re.sub(r"\bpan\.quark\.cn/\S+", "", s)
+            # 收尾多余空白和分隔符
+            s = re.sub(r"\s+", " ", s).strip(" -|·,，：:；;" + " ")
+            return s.strip()
         link_array = []
         for channel in search_results:
             for item in channel.get("list", []):
@@ -117,6 +130,8 @@ class CloudSaver:
                         if match := re.search(pattern_title, title, re.DOTALL):
                             title = match.group(2)
                         title = title.replace("&amp;", "&").strip()
+                        # 标题去除链接
+                        title = strip_links(title)
                         # 清洗内容
                         content = item.get("content", "")
                         if match := re.search(pattern_content, content, re.DOTALL):
@@ -125,9 +140,8 @@ class CloudSaver:
                         content = content.replace("</mark>", "")
                         content = content.strip()
                         # 获取发布时间 - 采用与原始实现一致的方式
-                        pubdate = item.get("pubDate", "")  # 使用 pubDate 字段
-                        if pubdate:
-                            pubdate = self._iso_to_cst(pubdate)  # 转换为中国标准时间
+                        pubdate_iso = item.get("pubDate", "")  # 原始时间字符串（可能为 ISO 或已是北京时间）
+                        pubdate = pubdate_iso  # 不做时区转换，保留来源原始时间
                         # 链接去重
                         if link.get("link") not in link_array:
                             link_array.append(link.get("link"))
@@ -136,7 +150,7 @@ class CloudSaver:
                                     "shareurl": link.get("link"),
                                     "taskname": title,
                                     "content": content,
-                                    "datetime": pubdate,  # 使用 datetime 字段名，与原始实现一致
+                                    "datetime": pubdate,  # 显示用时间
                                     "tags": item.get("tags", []),
                                     "channel": item.get("channelId", ""),
                                     "source": "CloudSaver"
@@ -146,24 +160,6 @@ class CloudSaver:
         # 注意：排序逻辑已移至全局，这里不再进行内部排序
         # 返回原始顺序的结果，由全局排序函数统一处理
         return clean_results
-    
-    def _iso_to_cst(self, iso_time_str: str) -> str:
-        """将 ISO 格式的时间字符串转换为 CST(China Standard Time) 时间并格式化为 %Y-%m-%d %H:%M:%S 格式
-        
-        Args:
-            iso_time_str (str): ISO 格式时间字符串
-            
-        Returns:
-            str: CST(China Standard Time) 时间字符串
-        """
-        try:
-            from datetime import datetime, timezone, timedelta
-            dt = datetime.fromisoformat(iso_time_str)
-            dt_cst = dt.astimezone(timezone(timedelta(hours=8)))
-            return dt_cst.strftime("%Y-%m-%d %H:%M:%S") if dt_cst.year >= 1970 else ""
-        except:
-            return iso_time_str  # 转换失败时返回原始字符串
-
 
 # 测试示例
 if __name__ == "__main__":
diff --git a/app/sdk/pansou.py b/app/sdk/pansou.py
index 726c3f3..f5ffb5a 100644
--- a/app/sdk/pansou.py
+++ b/app/sdk/pansou.py
@@ -57,6 +57,16 @@ class PanSou:
         
         # 解析结果：优先 results，然后 merged_by_type
         cleaned = []
+        # 工具：移除标题中的链接
+        def strip_links(text: str) -> str:
+            if not isinstance(text, str):
+                return text
+            s = text
+            import re
+            s = re.sub(r"https?://\S+", "", s)
+            s = re.sub(r"\bpan\.quark\.cn/\S+", "", s)
+            s = re.sub(r"\s+", " ", s).strip(" -|·,，：:；;" + " ")
+            return s.strip()
         
         try:
             # 1) results: 主要结果数组，每个结果包含 title 和 links
@@ -68,6 +78,7 @@ class PanSou:
                     
                     # 从 result_item 获取标题、内容和发布日期
                     title = result_item.get("title", "")
+                    title = strip_links(title)
                     content = result_item.get("content", "")
                     datetime_str = result_item.get("datetime", "")  # 获取发布日期
                     
@@ -84,7 +95,7 @@ class PanSou:
                                         "content": content,
                                         "shareurl": url,
                                         "tags": [link_type] if link_type else (result_item.get("tags", []) or []),
-                                        "publish_date": datetime_str,  # 添加发布日期字段
+                                        "publish_date": datetime_str,  # 原始时间（可能是 ISO）
                                         "source": "PanSou"  # 添加来源标识
                                     })
             
@@ -99,6 +110,7 @@ class PanSou:
                                     # 从 merged_by_type 获取链接信息
                                     url = link.get("url", "")
                                     note = link.get("note", "")  # 使用 note 字段作为标题
+                                    note = strip_links(note)
                                     datetime_str = link.get("datetime", "")  # 获取发布日期
                                     if url:
                                         cleaned.append({
@@ -106,7 +118,7 @@ class PanSou:
                                             "content": note,  # 如果没有 content，使用 note
                                             "shareurl": url,
                                             "tags": [cloud_type] if cloud_type else [],
-                                            "publish_date": datetime_str,  # 添加发布日期字段
+                                            "publish_date": datetime_str,  # 原始时间
                                             "source": "PanSou"  # 添加来源标识
                                         })
             
@@ -119,7 +131,7 @@ class PanSou:
                             "content": item.get("content", ""),
                             "shareurl": item.get("url", ""),
                             "tags": item.get("tags", []) or [],
-                            "publish_date": item.get("datetime", ""),  # 添加发布日期字段
+                            "publish_date": item.get("datetime", ""),  # 原始时间
                             "source": "PanSou"  # 添加来源标识
                         })
                         
@@ -152,37 +164,5 @@ class PanSou:
             if url and url not in seen_urls:
                 seen_urls.add(url)
                 unique_results.append(item)
-        
-        # 按发布日期排序：最新的在前
-        def parse_datetime(datetime_str):
-            """解析日期时间字符串，返回可比较的时间戳"""
-            if not datetime_str:
-                return 0  # 没有日期的排在最后
-            try:
-                from datetime import datetime, timezone, timedelta
-                # 尝试解析 ISO 8601 格式: 2025-07-28T20:43:27Z
-                dt = datetime.fromisoformat(datetime_str.replace('Z', '+00:00'))
-                return dt.timestamp()
-            except:
-                return 0  # 解析失败排在最后
-        
-        def convert_to_cst(datetime_str):
-            """将 ISO 时间转换为中国标准时间 (CST)"""
-            if not datetime_str:
-                return ""
-            try:
-                from datetime import datetime, timezone, timedelta
-                dt = datetime.fromisoformat(datetime_str.replace('Z', '+00:00'))
-                dt_cst = dt.astimezone(timezone(timedelta(hours=8)))
-                return dt_cst.strftime("%Y-%m-%d %H:%M:%S")
-            except:
-                return datetime_str  # 转换失败时返回原始字符串
-        
-        # 转换时间为中国标准时间格式
-        for item in unique_results:
-            if item.get("publish_date"):
-                item["publish_date"] = convert_to_cst(item["publish_date"])
-        
-        # 注意：排序逻辑已移至全局，这里不再进行内部排序
-        # 返回原始顺序的结果，由全局排序函数统一处理
+
         return {"success": True, "data": unique_results}
diff --git a/app/templates/index.html b/app/templates/index.html
index e6fb489..35b9d38 100644
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -587,7 +587,7 @@
               <div class="col">
                 <h2 style="display: inline-block; font-size: 1.5rem;">搜索来源</h2>
                 <span class="badge badge-pill badge-light">
-                  <a href="https://github.com/x1ao4/quark-auto-save-x/wiki/CloudSaver搜索源" target="_blank"><i class="bi bi-question-circle"></i></a>
+                  <a href="https://github.com/x1ao4/quark-auto-save-x/wiki/资源搜索" target="_blank"><i class="bi bi-question-circle"></i></a>
                 </span>
               </div>
             </div>
@@ -1026,7 +1026,7 @@
                             <span v-html="suggestion.verify ? '✅': ''"></span> {{ suggestion.taskname }}
                             <small class="text-muted">
                               <a :href="suggestion.shareurl" target="_blank" @click.stop> · {{ suggestion.shareurl.replace(/^https?:\/\/pan\.quark\.cn\/s\//, '') }}</a>
-                              <template v-if="suggestion.source"><span class="source-badge" :class="suggestion.source.toLowerCase()" :data-publish-date="suggestion.publish_date ? ' · ' + suggestion.publish_date : ''">{{ suggestion.source }}</span></template>
+                              <template v-if="suggestion.source"><span class="source-badge" :class="suggestion.source.toLowerCase()" :data-publish-date="suggestion.publish_date ? ' · ' + formatPublishDate(suggestion.publish_date) : ''">{{ suggestion.source }}</span></template>
                             </small>
                           </div>
                         </div>
@@ -1954,7 +1954,7 @@
                         <span v-html="suggestion.verify ? '✅': ''"></span> {{ suggestion.taskname }}
                         <small class="text-muted">
                           <a :href="suggestion.shareurl" target="_blank" @click.stop> · {{ suggestion.shareurl.replace(/^https?:\/\/pan\.quark\.cn\/s\//, '') }}</a>
-                          <template v-if="suggestion.source"><span class="source-badge" :class="suggestion.source.toLowerCase()" :data-publish-date="suggestion.publish_date ? ' · ' + suggestion.publish_date : ''">{{ suggestion.source }}</span></template>
+                          <template v-if="suggestion.source"><span class="source-badge" :class="suggestion.source.toLowerCase()" :data-publish-date="suggestion.publish_date ? ' · ' + formatPublishDate(suggestion.publish_date) : ''">{{ suggestion.source }}</span></template>
                         </small>
                       </div>
                     </div>
@@ -4329,11 +4329,7 @@
           const batchSize = 5;
           
           // 解析时间用于排序（降序：最新在前）
-          const getItemTs = (item) => {
-            const raw = item.publish_date || '';
-            const ts = Date.parse(raw);
-            return isNaN(ts) ? 0 : ts;
-          };
+          const getItemTs = (item) => this.parsePublishTs(item && item.publish_date);
           
           // 处理单个链接的函数
           const processLink = (link) => {
@@ -4462,11 +4458,7 @@
           this.smart_param._hasShownInterimResults = false;
           
           // 结束前做一次排序，确保最终顺序正确
-          const getItemTs = (item) => {
-            const raw = item.publish_date || '';
-            const ts = Date.parse(raw);
-            return isNaN(ts) ? 0 : ts;
-          };
+          const getItemTs = (item) => this.parsePublishTs(item && item.publish_date);
           validResults.sort((a, b) => getItemTs(b) - getItemTs(a));
           
           // 更新搜索结果
@@ -6535,6 +6527,44 @@
           const seconds = String(d.getSeconds()).padStart(2, '0');
           return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`;
         },
+        // 统一解析资源发布日期为时间戳
+        parsePublishTs(raw) {
+          if (!raw) return 0;
+          const s = String(raw).trim();
+          // YYYY-MM-DD HH:mm:ss
+          let m = /^\s*(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\s*$/.exec(s);
+          if (m) {
+            const [, y, mo, d, h, mi, se] = m;
+            return new Date(Number(y), Number(mo) - 1, Number(d), Number(h), Number(mi), Number(se)).getTime();
+          }
+          // YYYY-MM-DD
+          m = /^\s*(\d{4})-(\d{2})-(\d{2})\s*$/.exec(s);
+          if (m) {
+            const [, y, mo, d] = m;
+            return new Date(Number(y), Number(mo) - 1, Number(d), 0, 0, 0).getTime();
+          }
+          // ISO 回退
+          const ts = Date.parse(s);
+          return isNaN(ts) ? 0 : ts;
+        },
+        // 规范化资源发布日期展示：将 ISO 格式（含 T/Z/偏移）转为 "YYYY-MM-DD HH:mm:ss"
+        formatPublishDate(value) {
+          if (!value) return '';
+          const s = String(value).trim();
+          // 已是标准格式则直接返回
+          if (/^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}$/.test(s)) return s;
+          // 优先匹配 ISO 主体部分
+          const m = /^(\d{4})-(\d{2})-(\d{2})[T ](\d{2}):(\d{2}):(\d{2})/.exec(s);
+          if (m) {
+            const [, y, mo, d, h, mi, se] = m;
+            return `${y}-${mo}-${d} ${h}:${mi}:${se}`;
+          }
+          // 回退：简单替换T为空格并去除尾部Z/时区偏移
+          let out = s.replace('T', ' ');
+          out = out.replace(/Z$/i, '');
+          out = out.replace(/([+-]\d{2}:?\d{2})$/i, '');
+          return out;
+        },
         changeFolderPage(page) {
           if (page < 1) page = 1;
           if (page > this.fileManager.totalPages) page = this.fileManager.totalPages;