From d6390cde99da9081afe60c88ba90a4bf21a72e75 Mon Sep 17 00:00:00 2001 From: x1ao4 Date: Sat, 26 Apr 2025 21:11:16 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=9F=90=E4=BA=9B?= =?UTF-8?q?=E6=83=85=E5=86=B5=E4=B8=8B=E9=A1=BA=E5=BA=8F=E5=91=BD=E5=90=8D?= =?UTF-8?q?=E9=A2=84=E8=A7=88=E7=95=8C=E9=9D=A2=E6=98=BE=E7=A4=BA=E7=9A=84?= =?UTF-8?q?=E9=87=8D=E5=91=BD=E5=90=8D=E4=B8=8E=E5=AE=9E=E9=99=85=E6=93=8D?= =?UTF-8?q?=E4=BD=9C=E4=B8=8D=E4=B8=80=E8=87=B4=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/run.py | 174 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 136 insertions(+), 38 deletions(-) diff --git a/app/run.py b/app/run.py index 71fb2f6..049124c 100644 --- a/app/run.py +++ b/app/run.py @@ -331,56 +331,154 @@ def get_share_detail(): if file["dir"]: # 跳过文件夹 return float('inf') - file_name = file["file_name"] + filename = file["file_name"] - # 1. 首先尝试提取SxxExx格式(如S01E01) - match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', file_name) + # 提取文件名,不含扩展名 + file_name_without_ext = os.path.splitext(filename)[0] + + # 1. "第X期/集/话" 格式 - 保持最高优先级 + match_chinese = re.search(r'第(\d+)[期集话]', filename) + episode_num = int(match_chinese.group(1)) if match_chinese else 0 + + # 5. 文件名含"上中下"(优先处理,因为可能与其他格式同时存在) + if match_chinese: + # 如果同时存在集数和上中下,则按照集数*10+位置排序 + if '上' in filename: + return episode_num * 10 + 1 + elif '中' in filename: + return episode_num * 10 + 2 + elif '下' in filename: + return episode_num * 10 + 3 + return episode_num * 10 + elif '上' in filename: + return 1 + elif '中' in filename: + return 2 + elif '下' in filename: + return 3 + + # 1.2 "X集/期/话" 格式 - 与我们修改后的优先级一致 + match_chinese_simple = re.search(r'(\d+)[期集话]', filename) + if match_chinese_simple: + return int(match_chinese_simple.group(1)) + + # 2.1 S01E01 格式,提取季数和集数 + match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) if match_s_e: season = int(match_s_e.group(1)) episode = int(match_s_e.group(2)) return season * 1000 + episode - # 2. 尝试提取E01/EP01格式 - match_e = re.search(r'[Ee][Pp]?(\d+)', file_name) + # 2.2 E01/EP01 格式,仅提取集数 + match_e = re.search(r'[Ee][Pp]?(\d+)', filename) if match_e: return int(match_e.group(1)) - # 3. 首先尝试提取期数(第X期) - period_match = re.search(r'第(\d+)期[上中下]', file_name) - if period_match: - period_num = int(period_match.group(1)) - # 根据上中下调整排序 - if '上' in file_name: - return period_num * 3 - 2 - elif '中' in file_name: - return period_num * 3 - 1 - elif '下' in file_name: - return period_num * 3 - return period_num * 3 + # 2.3 1x01 格式,提取季数和集数 + match_x = re.search(r'(\d+)[Xx](\d+)', filename) + if match_x: + season = int(match_x.group(1)) + episode = int(match_x.group(2)) + return season * 1000 + episode - # 4. 尝试提取日期格式(YYYY-MM-DD) - date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', file_name) - if date_match: - year = int(date_match.group(1)) - month = int(date_match.group(2)) - day = int(date_match.group(3)) - base_value = year * 10000 + month * 100 + day - # 如果同一天有多个文件,根据"上中下"或其他标识符进行排序 - if '上' in file_name: - return base_value * 10 + 1 - elif '中' in file_name: - return base_value * 10 + 2 - elif '下' in file_name: - return base_value * 10 + 3 - return base_value * 10 + # 2.4 数字后接4K格式 + match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename) + if match_4k: + return int(match_4k.group(1)) - # 5. 尝试提取任何数字 - number_match = re.search(r'(\d+)', file_name) - if number_match: - return int(number_match.group(1)) + # 2.5 方括号包围的数字 + match_bracket = re.search(r'\[(\d+)\]', filename) + if match_bracket: + return int(match_bracket.group(1)) - # 6. 默认使用原文件名 - return float('inf') + # 2.6 中括号包围的数字 + match_cn_bracket = re.search(r'【(\d+)】', filename) + if match_cn_bracket: + return int(match_cn_bracket.group(1)) + + # 2.7 下划线包围的数字 + match_underscore = re.search(r'_?(\d+)_', filename) + if match_underscore: + return int(match_underscore.group(1)) + + # 3. 日期格式识别(支持多种格式) + + # 3.1 完整的YYYYMMDD格式 + match_date_compact = re.search(r'(20\d{2})(\d{2})(\d{2})', filename) + if match_date_compact: + year = int(match_date_compact.group(1)) + month = int(match_date_compact.group(2)) + day = int(match_date_compact.group(3)) + return year * 10000 + month * 100 + day + + # 3.2 YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 格式 + match_date_full = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2})', filename) + if match_date_full: + year = int(match_date_full.group(1)) + month = int(match_date_full.group(2)) + day = int(match_date_full.group(3)) + return year * 10000 + month * 100 + day + + # 3.3 MM/DD/YYYY 或 DD/MM/YYYY 格式 + match_date_alt = re.search(r'(\d{1,2})[-./](\d{1,2})[-./](20\d{2})', filename) + if match_date_alt: + # 假设第一个是月,第二个是日(美式日期) + month = int(match_date_alt.group(1)) + day = int(match_date_alt.group(2)) + year = int(match_date_alt.group(3)) + # 检查月份值,如果大于12可能是欧式日期格式(DD/MM/YYYY) + if month > 12: + month, day = day, month + return year * 10000 + month * 100 + day + + # 3.4 MM/DD 格式(无年份),假设为当前年 + match_date_short = re.search(r'(\d{1,2})[-./](\d{1,2})', filename) + if match_date_short: + # 假设第一个是月,第二个是日 + month = int(match_date_short.group(1)) + day = int(match_date_short.group(2)) + # 检查月份值,如果大于12可能是欧式日期格式(DD/MM) + if month > 12: + month, day = day, month + # 由于没有年份,使用一个较低的基数,确保任何有年份的日期都排在后面 + return month * 100 + day + + # 3.5 年期格式,如"2025年14期" + match_year_issue = re.search(r'(20\d{2})[年].*?(\d+)[期]', filename) + if match_year_issue: + year = int(match_year_issue.group(1)) + issue = int(match_year_issue.group(2)) + return year * 1000 + issue + + # 3.6 日期+期数的复合格式,例如:2025-04-18 第5期上 + match_date_episode = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2}).*?第(\d+)[期集话]', filename) + if match_date_episode: + year = int(match_date_episode.group(1)) + month = int(match_date_episode.group(2)) + day = int(match_date_episode.group(3)) + episode = int(match_date_episode.group(4)) + date_val = year * 10000 + month * 100 + day + # 将日期值作为主排序,期数为次要排序 + if '上' in filename: + return date_val * 100 + episode * 10 + 1 + elif '中' in filename: + return date_val * 100 + episode * 10 + 2 + elif '下' in filename: + return date_val * 100 + episode * 10 + 3 + return date_val * 100 + episode * 10 + + # 4. 纯数字格式(文件名开头是纯数字) + match_num = re.match(r'^(\d+)', file_name_without_ext) + if match_num: + return int(match_num.group(1)) + + # 5. 尝试匹配文件名中的任何数字 + any_num_match = re.search(r'(\d+)', filename) + if any_num_match: + return int(any_num_match.group(1)) + + # 6. 默认使用更新时间 + return file.get("created_at", file.get("updated_at", file.get("last_update_at", 0))) # 过滤出非目录文件,并且排除已经符合命名规则的文件 files_to_process = [] From 649169327bc4bb6935e787de459167fb763c38f9 Mon Sep 17 00:00:00 2001 From: x1ao4 Date: Sat, 26 Apr 2025 21:39:29 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E5=B0=86=E9=A1=BA=E5=BA=8F=E5=91=BD?= =?UTF-8?q?=E5=90=8D=E4=BD=BF=E7=94=A8=E7=9A=84=E6=96=87=E4=BB=B6=E6=8E=92?= =?UTF-8?q?=E5=BA=8F=E5=87=BD=E6=95=B0=E6=94=B9=E4=B8=BA=E5=85=A8=E5=B1=80?= =?UTF-8?q?=E5=87=BD=E6=95=B0=EF=BC=8C=E5=B9=B6=E4=BC=98=E5=8C=96=E6=8E=92?= =?UTF-8?q?=E5=BA=8F=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/run.py | 163 +------------ quark_auto_save.py | 566 +++++++++++++++++---------------------------- 2 files changed, 222 insertions(+), 507 deletions(-) diff --git a/app/run.py b/app/run.py index 049124c..f681b12 100644 --- a/app/run.py +++ b/app/run.py @@ -31,9 +31,9 @@ sys.path.insert(0, parent_dir) from quark_auto_save import Quark from quark_auto_save import Config -# 添加导入全局extract_episode_number函数 +# 添加导入全局extract_episode_number和sort_file_by_name函数 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from quark_auto_save import extract_episode_number +from quark_auto_save import extract_episode_number, sort_file_by_name def get_app_ver(): @@ -328,157 +328,7 @@ def get_share_detail(): # 实现与实际重命名相同的排序算法 def extract_sort_value(file): - if file["dir"]: # 跳过文件夹 - return float('inf') - - filename = file["file_name"] - - # 提取文件名,不含扩展名 - file_name_without_ext = os.path.splitext(filename)[0] - - # 1. "第X期/集/话" 格式 - 保持最高优先级 - match_chinese = re.search(r'第(\d+)[期集话]', filename) - episode_num = int(match_chinese.group(1)) if match_chinese else 0 - - # 5. 文件名含"上中下"(优先处理,因为可能与其他格式同时存在) - if match_chinese: - # 如果同时存在集数和上中下,则按照集数*10+位置排序 - if '上' in filename: - return episode_num * 10 + 1 - elif '中' in filename: - return episode_num * 10 + 2 - elif '下' in filename: - return episode_num * 10 + 3 - return episode_num * 10 - elif '上' in filename: - return 1 - elif '中' in filename: - return 2 - elif '下' in filename: - return 3 - - # 1.2 "X集/期/话" 格式 - 与我们修改后的优先级一致 - match_chinese_simple = re.search(r'(\d+)[期集话]', filename) - if match_chinese_simple: - return int(match_chinese_simple.group(1)) - - # 2.1 S01E01 格式,提取季数和集数 - match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) - if match_s_e: - season = int(match_s_e.group(1)) - episode = int(match_s_e.group(2)) - return season * 1000 + episode - - # 2.2 E01/EP01 格式,仅提取集数 - match_e = re.search(r'[Ee][Pp]?(\d+)', filename) - if match_e: - return int(match_e.group(1)) - - # 2.3 1x01 格式,提取季数和集数 - match_x = re.search(r'(\d+)[Xx](\d+)', filename) - if match_x: - season = int(match_x.group(1)) - episode = int(match_x.group(2)) - return season * 1000 + episode - - # 2.4 数字后接4K格式 - match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename) - if match_4k: - return int(match_4k.group(1)) - - # 2.5 方括号包围的数字 - match_bracket = re.search(r'\[(\d+)\]', filename) - if match_bracket: - return int(match_bracket.group(1)) - - # 2.6 中括号包围的数字 - match_cn_bracket = re.search(r'【(\d+)】', filename) - if match_cn_bracket: - return int(match_cn_bracket.group(1)) - - # 2.7 下划线包围的数字 - match_underscore = re.search(r'_?(\d+)_', filename) - if match_underscore: - return int(match_underscore.group(1)) - - # 3. 日期格式识别(支持多种格式) - - # 3.1 完整的YYYYMMDD格式 - match_date_compact = re.search(r'(20\d{2})(\d{2})(\d{2})', filename) - if match_date_compact: - year = int(match_date_compact.group(1)) - month = int(match_date_compact.group(2)) - day = int(match_date_compact.group(3)) - return year * 10000 + month * 100 + day - - # 3.2 YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 格式 - match_date_full = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2})', filename) - if match_date_full: - year = int(match_date_full.group(1)) - month = int(match_date_full.group(2)) - day = int(match_date_full.group(3)) - return year * 10000 + month * 100 + day - - # 3.3 MM/DD/YYYY 或 DD/MM/YYYY 格式 - match_date_alt = re.search(r'(\d{1,2})[-./](\d{1,2})[-./](20\d{2})', filename) - if match_date_alt: - # 假设第一个是月,第二个是日(美式日期) - month = int(match_date_alt.group(1)) - day = int(match_date_alt.group(2)) - year = int(match_date_alt.group(3)) - # 检查月份值,如果大于12可能是欧式日期格式(DD/MM/YYYY) - if month > 12: - month, day = day, month - return year * 10000 + month * 100 + day - - # 3.4 MM/DD 格式(无年份),假设为当前年 - match_date_short = re.search(r'(\d{1,2})[-./](\d{1,2})', filename) - if match_date_short: - # 假设第一个是月,第二个是日 - month = int(match_date_short.group(1)) - day = int(match_date_short.group(2)) - # 检查月份值,如果大于12可能是欧式日期格式(DD/MM) - if month > 12: - month, day = day, month - # 由于没有年份,使用一个较低的基数,确保任何有年份的日期都排在后面 - return month * 100 + day - - # 3.5 年期格式,如"2025年14期" - match_year_issue = re.search(r'(20\d{2})[年].*?(\d+)[期]', filename) - if match_year_issue: - year = int(match_year_issue.group(1)) - issue = int(match_year_issue.group(2)) - return year * 1000 + issue - - # 3.6 日期+期数的复合格式,例如:2025-04-18 第5期上 - match_date_episode = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2}).*?第(\d+)[期集话]', filename) - if match_date_episode: - year = int(match_date_episode.group(1)) - month = int(match_date_episode.group(2)) - day = int(match_date_episode.group(3)) - episode = int(match_date_episode.group(4)) - date_val = year * 10000 + month * 100 + day - # 将日期值作为主排序,期数为次要排序 - if '上' in filename: - return date_val * 100 + episode * 10 + 1 - elif '中' in filename: - return date_val * 100 + episode * 10 + 2 - elif '下' in filename: - return date_val * 100 + episode * 10 + 3 - return date_val * 100 + episode * 10 - - # 4. 纯数字格式(文件名开头是纯数字) - match_num = re.match(r'^(\d+)', file_name_without_ext) - if match_num: - return int(match_num.group(1)) - - # 5. 尝试匹配文件名中的任何数字 - any_num_match = re.search(r'(\d+)', filename) - if any_num_match: - return int(any_num_match.group(1)) - - # 6. 默认使用更新时间 - return file.get("created_at", file.get("updated_at", file.get("last_update_at", 0))) + return sort_file_by_name(file) # 过滤出非目录文件,并且排除已经符合命名规则的文件 files_to_process = [] @@ -570,11 +420,8 @@ def get_share_detail(): if episode_num is not None: return episode_num - # 如果无法提取序号,则使用更新时间 - try: - return file.get("last_update_at", 0) - except: - return 0 + # 如果无法提取剧集号,则使用通用的排序函数 + return sort_file_by_name(file) # 过滤出非目录文件,并且排除已经符合命名规则的文件 files_to_process = [] diff --git a/quark_auto_save.py b/quark_auto_save.py index f35e99b..87a69cb 100644 --- a/quark_auto_save.py +++ b/quark_auto_save.py @@ -18,7 +18,213 @@ import importlib import urllib.parse from datetime import datetime -# 统一的剧集编号提取函数 +# 全局的文件排序函数 +def sort_file_by_name(file): + """ + 通用的文件排序函数,用于根据文件名智能排序 + 支持多种格式的日期、期数、集数等提取和排序 + """ + if isinstance(file, dict) and file.get("dir", False): # 跳过文件夹 + return float('inf') + + # 获取文件名,支持字符串或文件对象 + if isinstance(file, dict): + filename = file.get("file_name", "") + else: + filename = file + + # 提取文件名,不含扩展名 + file_name_without_ext = os.path.splitext(filename)[0] + + # 1. 日期格式识别(支持多种格式)- 最高优先级 + + # 1.1 日期+期数的复合格式,例如:2025-04-18 第5期上 + match_date_episode = re.search(r'(20\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2}).*?第(\d+)[期集话]', filename) + if match_date_episode: + year = int(match_date_episode.group(1)) + month = int(match_date_episode.group(2)) + day = int(match_date_episode.group(3)) + episode = int(match_date_episode.group(4)) + date_val = year * 10000 + month * 100 + day + # 将日期值作为主排序,期数为次要排序 + if '上' in filename: + return date_val * 100 + episode * 10 + 1 + elif '中' in filename: + return date_val * 100 + episode * 10 + 2 + elif '下' in filename: + return date_val * 100 + episode * 10 + 3 + return date_val * 100 + episode * 10 + + # 1.2 使用两位年份的日期+期数格式,如:23-04-18 第5期 + match_yy_date_episode = re.search(r'((?:19|20)?\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2}).*?第(\d+)[期集话]', filename) + if match_yy_date_episode and len(match_yy_date_episode.group(1)) == 2: + year_str = match_yy_date_episode.group(1) + # 如果是两位年份,假设20xx年 + year = int("20" + year_str) + month = int(match_yy_date_episode.group(2)) + day = int(match_yy_date_episode.group(3)) + episode = int(match_yy_date_episode.group(4)) + date_val = year * 10000 + month * 100 + day + if '上' in filename: + return date_val * 100 + episode * 10 + 1 + elif '中' in filename: + return date_val * 100 + episode * 10 + 2 + elif '下' in filename: + return date_val * 100 + episode * 10 + 3 + return date_val * 100 + episode * 10 + + # 1.3 完整的YYYYMMDD格式(无分隔符) + match_date_compact = re.search(r'((?:19|20)\d{2})(\d{2})(\d{2})', filename) + if match_date_compact: + year = int(match_date_compact.group(1)) + month = int(match_date_compact.group(2)) + day = int(match_date_compact.group(3)) + return year * 10000 + month * 100 + day + + # 1.4 YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 或 YYYY MM DD格式 + match_date_full = re.search(r'((?:19|20)\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2})', filename) + if match_date_full: + year = int(match_date_full.group(1)) + month = int(match_date_full.group(2)) + day = int(match_date_full.group(3)) + return year * 10000 + month * 100 + day + + # 1.5 YY-MM-DD 或 YY.MM.DD 或 YY/MM/DD 或 YY MM DD格式(两位年份) + match_yy_date = re.search(r'((?:19|20)?\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2})', filename) + if match_yy_date and len(match_yy_date.group(1)) == 2: + year_str = match_yy_date.group(1) + # 如果是两位年份,假设20xx年 + year = int("20" + year_str) + month = int(match_yy_date.group(2)) + day = int(match_yy_date.group(3)) + return year * 10000 + month * 100 + day + + # 1.6 YYMMDD格式(两位年份,无分隔符) + match_yy_compact = re.search(r'(? 12: + month, day = day, month + return year * 10000 + month * 100 + day + + # 1.8 年期格式,如"2025年14期" + match_year_issue = re.search(r'((?:19|20)\d{2})[年].*?(\d+)[期]', filename) + if match_year_issue: + year = int(match_year_issue.group(1)) + issue = int(match_year_issue.group(2)) + return year * 1000 + issue + + # 1.9 MM-DD 或 MM.DD 或 MM/DD 或 MM DD格式(无年份),假设为当前年 + match_date_short = re.search(r'(? 12: + month, day = day, month + # 由于没有年份,使用一个较低的基数,确保任何有年份的日期都排在后面 + return month * 100 + day + + # 2. "第X期/集/话" 格式 + match_chinese = re.search(r'第(\d+)[期集话]', filename) + episode_num = int(match_chinese.group(1)) if match_chinese else 0 + + # 文件名含"上中下"(优先处理,因为可能与其他格式同时存在) + if match_chinese: + # 如果同时存在集数和上中下,则按照集数*10+位置排序 + if '上' in filename: + return episode_num * 10 + 1 + elif '中' in filename: + return episode_num * 10 + 2 + elif '下' in filename: + return episode_num * 10 + 3 + return episode_num * 10 + elif '上' in filename: + return 1 + elif '中' in filename: + return 2 + elif '下' in filename: + return 3 + + # 2.1 "X集/期/话" 格式 + match_chinese_simple = re.search(r'(\d+)[期集话]', filename) + if match_chinese_simple: + return int(match_chinese_simple.group(1)) + + # 3.1 S01E01 格式,提取季数和集数 + match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) + if match_s_e: + season = int(match_s_e.group(1)) + episode = int(match_s_e.group(2)) + return season * 1000 + episode + + # 3.2 E01/EP01 格式,仅提取集数 + match_e = re.search(r'[Ee][Pp]?(\d+)', filename) + if match_e: + return int(match_e.group(1)) + + # 3.3 1x01 格式,提取季数和集数 + match_x = re.search(r'(\d+)[Xx](\d+)', filename) + if match_x: + season = int(match_x.group(1)) + episode = int(match_x.group(2)) + return season * 1000 + episode + + # 3.4 数字后接4K格式 + match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename) + if match_4k: + return int(match_4k.group(1)) + + # 3.5 方括号包围的数字 + match_bracket = re.search(r'\[(\d+)\]', filename) + if match_bracket: + return int(match_bracket.group(1)) + + # 3.6 中括号包围的数字 + match_cn_bracket = re.search(r'【(\d+)】', filename) + if match_cn_bracket: + return int(match_cn_bracket.group(1)) + + # 3.7 下划线包围的数字 + match_underscore = re.search(r'_?(\d+)_', filename) + if match_underscore: + return int(match_underscore.group(1)) + + # 4. 纯数字格式(文件名开头是纯数字) + match_num = re.match(r'^(\d+)', file_name_without_ext) + if match_num: + return int(match_num.group(1)) + + # 5. 尝试匹配文件名中的任何数字 + any_num_match = re.search(r'(\d+)', filename) + if any_num_match: + return int(any_num_match.group(1)) + + # 6. 默认使用更新时间 + if isinstance(file, dict): + return file.get("created_at", file.get("updated_at", file.get("last_update_at", 0))) + + return float('inf') + + +# 全局的剧集编号提取函数 def extract_episode_number(filename, episode_patterns=None, config_data=None): """ 从文件名中提取剧集编号 @@ -1099,186 +1305,16 @@ class Quark: # 实现高级排序算法 def extract_sorting_value(file): - if file.get("dir", False): # 跳过文件夹 - return float('inf') - - filename = file["file_name"] - - # 提取文件名,不含扩展名 - file_name_without_ext = os.path.splitext(filename)[0] - - # 1. "第X期/集/话" 格式 - 保持最高优先级 - match_chinese = re.search(r'第(\d+)[期集话]', filename) - episode_num = int(match_chinese.group(1)) if match_chinese else 0 - - # 5. 文件名含"上中下"(优先处理,因为可能与其他格式同时存在) - if match_chinese: - # 如果同时存在集数和上中下,则按照集数*10+位置排序 - if '上' in filename: - return episode_num * 10 + 1 - elif '中' in filename: - return episode_num * 10 + 2 - elif '下' in filename: - return episode_num * 10 + 3 - return episode_num * 10 - elif '上' in filename: - return 1 - elif '中' in filename: - return 2 - elif '下' in filename: - return 3 - - # 1.2 "X集/期/话" 格式 - 与我们修改后的优先级一致 - match_chinese_simple = re.search(r'(\d+)[期集话]', filename) - if match_chinese_simple: - return int(match_chinese_simple.group(1)) - - # 2.1 S01E01 格式,提取季数和集数 - match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) - if match_s_e: - season = int(match_s_e.group(1)) - episode = int(match_s_e.group(2)) - return season * 1000 + episode - - # 2.2 E01/EP01 格式,仅提取集数 - match_e = re.search(r'[Ee][Pp]?(\d+)', filename) - if match_e: - return int(match_e.group(1)) - - # 2.3 1x01 格式,提取季数和集数 - match_x = re.search(r'(\d+)[Xx](\d+)', filename) - if match_x: - season = int(match_x.group(1)) - episode = int(match_x.group(2)) - return season * 1000 + episode - - # 2.4 数字后接4K格式 - match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename) - if match_4k: - return int(match_4k.group(1)) - - # 2.5 方括号包围的数字 - match_bracket = re.search(r'\[(\d+)\]', filename) - if match_bracket: - return int(match_bracket.group(1)) - - # 2.6 中括号包围的数字 - match_cn_bracket = re.search(r'【(\d+)】', filename) - if match_cn_bracket: - return int(match_cn_bracket.group(1)) - - # 2.7 下划线包围的数字 - match_underscore = re.search(r'_?(\d+)_', filename) - if match_underscore: - return int(match_underscore.group(1)) - - # 3. 日期格式识别(支持多种格式) - - # 3.1 完整的YYYYMMDD格式 - match_date_compact = re.search(r'(20\d{2})(\d{2})(\d{2})', filename) - if match_date_compact: - year = int(match_date_compact.group(1)) - month = int(match_date_compact.group(2)) - day = int(match_date_compact.group(3)) - return year * 10000 + month * 100 + day - - # 3.2 YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 格式 - match_date_full = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2})', filename) - if match_date_full: - year = int(match_date_full.group(1)) - month = int(match_date_full.group(2)) - day = int(match_date_full.group(3)) - return year * 10000 + month * 100 + day - - # 3.3 MM/DD/YYYY 或 DD/MM/YYYY 格式 - match_date_alt = re.search(r'(\d{1,2})[-./](\d{1,2})[-./](20\d{2})', filename) - if match_date_alt: - # 假设第一个是月,第二个是日(美式日期) - month = int(match_date_alt.group(1)) - day = int(match_date_alt.group(2)) - year = int(match_date_alt.group(3)) - # 检查月份值,如果大于12可能是欧式日期格式(DD/MM/YYYY) - if month > 12: - month, day = day, month - return year * 10000 + month * 100 + day - - # 3.4 MM/DD 格式(无年份),假设为当前年 - match_date_short = re.search(r'(\d{1,2})[-./](\d{1,2})', filename) - if match_date_short: - # 假设第一个是月,第二个是日 - month = int(match_date_short.group(1)) - day = int(match_date_short.group(2)) - # 检查月份值,如果大于12可能是欧式日期格式(DD/MM) - if month > 12: - month, day = day, month - # 由于没有年份,使用一个较低的基数,确保任何有年份的日期都排在后面 - return month * 100 + day - - # 3.5 年期格式,如"2025年14期" - match_year_issue = re.search(r'(20\d{2})[年].*?(\d+)[期]', filename) - if match_year_issue: - year = int(match_year_issue.group(1)) - issue = int(match_year_issue.group(2)) - return year * 1000 + issue - - # 3.6 日期+期数的复合格式,例如:2025-04-18 第5期上 - match_date_episode = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2}).*?第(\d+)[期集话]', filename) - if match_date_episode: - year = int(match_date_episode.group(1)) - month = int(match_date_episode.group(2)) - day = int(match_date_episode.group(3)) - episode = int(match_date_episode.group(4)) - date_val = year * 10000 + month * 100 + day - # 将日期值作为主排序,期数为次要排序 - if '上' in filename: - return date_val * 100 + episode * 10 + 1 - elif '中' in filename: - return date_val * 100 + episode * 10 + 2 - elif '下' in filename: - return date_val * 100 + episode * 10 + 3 - return date_val * 100 + episode * 10 - - # 4. 纯数字格式(文件名开头是纯数字) - match_num = re.match(r'^(\d+)', file_name_without_ext) - if match_num: - return int(match_num.group(1)) - - # 5. 尝试匹配文件名中的任何数字 - any_num_match = re.search(r'(\d+)', filename) - if any_num_match: - return int(any_num_match.group(1)) - - # 6. 默认使用更新时间 - return file.get("created_at", file.get("updated_at", file.get("last_update_at", 0))) + # 使用全局排序函数 + return sort_file_by_name(file) - # 过滤出非目录文件,排除已经排除掉的重复文件,然后排序 - files_to_process = [] - for f in filtered_share_files: - if f["dir"]: - continue # 跳过文件夹 - - # 检查文件是否已符合命名规则 - if sequence_pattern == "{}": - # 对于单独的{},检查文件名是否为纯数字 - file_name_without_ext = os.path.splitext(f["file_name"])[0] - if file_name_without_ext.isdigit(): - # 增加判断:如果是日期格式的纯数字,不视为已命名 - if not is_date_format(file_name_without_ext): - continue # 跳过已符合命名规则的文件 - elif re.match(regex_pattern, f["file_name"]): - continue # 跳过已符合命名规则的文件 - - # 添加到待处理文件列表 - files_to_process.append(f) - - # 根据提取的排序值进行排序 - sorted_files = sorted(files_to_process, key=extract_sorting_value) + # 判断是否使用单独的{}模式 # 需保存的文件清单 need_save_list = [] # 为每个文件分配序号 - for share_file in sorted_files: + for share_file in filtered_share_files: # 获取文件扩展名 file_ext = os.path.splitext(share_file["file_name"])[1] # 生成新文件名 @@ -1691,157 +1727,10 @@ class Quark: # 实现高级排序算法 def extract_sorting_value(file): - if file.get("dir", False): # 跳过文件夹 - return float('inf') - - filename = file["file_name"] - - # 提取文件名,不含扩展名 - file_name_without_ext = os.path.splitext(filename)[0] - - # 1. "第X期/集/话" 格式 - 保持最高优先级 - match_chinese = re.search(r'第(\d+)[期集话]', filename) - episode_num = int(match_chinese.group(1)) if match_chinese else 0 - - # 5. 文件名含"上中下"(优先处理,因为可能与其他格式同时存在) - if match_chinese: - # 如果同时存在集数和上中下,则按照集数*10+位置排序 - if '上' in filename: - return episode_num * 10 + 1 - elif '中' in filename: - return episode_num * 10 + 2 - elif '下' in filename: - return episode_num * 10 + 3 - return episode_num * 10 - elif '上' in filename: - return 1 - elif '中' in filename: - return 2 - elif '下' in filename: - return 3 - - # 1.2 "X集/期/话" 格式 - 与我们修改后的优先级一致 - match_chinese_simple = re.search(r'(\d+)[期集话]', filename) - if match_chinese_simple: - return int(match_chinese_simple.group(1)) - - # 2.1 S01E01 格式,提取季数和集数 - match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) - if match_s_e: - season = int(match_s_e.group(1)) - episode = int(match_s_e.group(2)) - return season * 1000 + episode - - # 2.2 E01/EP01 格式,仅提取集数 - match_e = re.search(r'[Ee][Pp]?(\d+)', filename) - if match_e: - return int(match_e.group(1)) - - # 2.3 1x01 格式,提取季数和集数 - match_x = re.search(r'(\d+)[Xx](\d+)', filename) - if match_x: - season = int(match_x.group(1)) - episode = int(match_x.group(2)) - return season * 1000 + episode - - # 2.4 数字后接4K格式 - match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename) - if match_4k: - return int(match_4k.group(1)) - - # 2.5 方括号包围的数字 - match_bracket = re.search(r'\[(\d+)\]', filename) - if match_bracket: - return int(match_bracket.group(1)) - - # 2.6 中括号包围的数字 - match_cn_bracket = re.search(r'【(\d+)】', filename) - if match_cn_bracket: - return int(match_cn_bracket.group(1)) - - # 2.7 下划线包围的数字 - match_underscore = re.search(r'_?(\d+)_', filename) - if match_underscore: - return int(match_underscore.group(1)) - - # 3. 日期格式识别(支持多种格式) - - # 3.1 完整的YYYYMMDD格式 - match_date_compact = re.search(r'(20\d{2})(\d{2})(\d{2})', filename) - if match_date_compact: - year = int(match_date_compact.group(1)) - month = int(match_date_compact.group(2)) - day = int(match_date_compact.group(3)) - return year * 10000 + month * 100 + day - - # 3.2 YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 格式 - match_date_full = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2})', filename) - if match_date_full: - year = int(match_date_full.group(1)) - month = int(match_date_full.group(2)) - day = int(match_date_full.group(3)) - return year * 10000 + month * 100 + day - - # 3.3 MM/DD/YYYY 或 DD/MM/YYYY 格式 - match_date_alt = re.search(r'(\d{1,2})[-./](\d{1,2})[-./](20\d{2})', filename) - if match_date_alt: - # 假设第一个是月,第二个是日(美式日期) - month = int(match_date_alt.group(1)) - day = int(match_date_alt.group(2)) - year = int(match_date_alt.group(3)) - # 检查月份值,如果大于12可能是欧式日期格式(DD/MM/YYYY) - if month > 12: - month, day = day, month - return year * 10000 + month * 100 + day - - # 3.4 MM/DD 格式(无年份),假设为当前年 - match_date_short = re.search(r'(\d{1,2})[-./](\d{1,2})', filename) - if match_date_short: - # 假设第一个是月,第二个是日 - month = int(match_date_short.group(1)) - day = int(match_date_short.group(2)) - # 检查月份值,如果大于12可能是欧式日期格式(DD/MM) - if month > 12: - month, day = day, month - # 由于没有年份,使用一个较低的基数,确保任何有年份的日期都排在后面 - return month * 100 + day - - # 3.5 年期格式,如"2025年14期" - match_year_issue = re.search(r'(20\d{2})[年].*?(\d+)[期]', filename) - if match_year_issue: - year = int(match_year_issue.group(1)) - issue = int(match_year_issue.group(2)) - return year * 1000 + issue - - # 3.6 日期+期数的复合格式,例如:2025-04-18 第5期上 - match_date_episode = re.search(r'(20\d{2})[-./](\d{1,2})[-./](\d{1,2}).*?第(\d+)[期集话]', filename) - if match_date_episode: - year = int(match_date_episode.group(1)) - month = int(match_date_episode.group(2)) - day = int(match_date_episode.group(3)) - episode = int(match_date_episode.group(4)) - date_val = year * 10000 + month * 100 + day - # 将日期值作为主排序,期数为次要排序 - if '上' in filename: - return date_val * 100 + episode * 10 + 1 - elif '中' in filename: - return date_val * 100 + episode * 10 + 2 - elif '下' in filename: - return date_val * 100 + episode * 10 + 3 - return date_val * 100 + episode * 10 - - # 4. 纯数字格式(文件名开头是纯数字) - match_num = re.match(r'^(\d+)', file_name_without_ext) - if match_num: - return int(match_num.group(1)) - - # 5. 尝试匹配文件名中的任何数字 - any_num_match = re.search(r'(\d+)', filename) - if any_num_match: - return int(any_num_match.group(1)) - - # 6. 默认使用更新时间 - return file.get("created_at", file.get("updated_at", file.get("last_update_at", 0))) + # 使用全局排序函数 + return sort_file_by_name(file) + + # 判断是否使用单独的{}模式 # 初始化sorted_files列表,用于收集需要重命名的文件 sorted_files = [] @@ -2492,29 +2381,8 @@ class Quark: # 修改为按日期或数字排序(复用与文件树相同的排序逻辑) def extract_sort_value(file_name): - # 尝试提取日期格式(优先YYYY-MM-DD格式) - date_match = re.search(r'(\d{4})[-./](\d{1,2})[-./](\d{1,2})', file_name) - if date_match: - year = int(date_match.group(1)) - month = int(date_match.group(2)) - day = int(date_match.group(3)) - return year * 10000 + month * 100 + day - - # 尝试提取紧凑日期格式(YYYYMMDD) - compact_date_match = re.search(r'(\d{4})(\d{2})(\d{2})', file_name) - if compact_date_match: - year = int(compact_date_match.group(1)) - month = int(compact_date_match.group(2)) - day = int(compact_date_match.group(3)) - return year * 10000 + month * 100 + day - - # 尝试提取任何数字 - number_match = re.search(r'(\d+)', file_name) - if number_match: - return int(number_match.group(1)) - - # 默认使用原文件名 - return float('inf') + # 使用全局排序函数 + return sort_file_by_name(file_name) # 按目标文件名中的日期或数字进行排序,与顺序命名和剧集命名模式保持一致 rename_operations.sort(key=lambda x: extract_sort_value(x[1])) From bb7748161df165d15a628b1ae2f2388532228c1d Mon Sep 17 00:00:00 2001 From: x1ao4 Date: Sat, 26 Apr 2025 22:08:40 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E5=B0=86=E5=85=A8=E5=B1=80=E7=9A=84?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=8E=92=E5=BA=8F=E5=87=BD=E6=95=B0=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E4=B8=BA=E5=A4=9A=E7=BA=A7=E6=8E=92=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- quark_auto_save.py | 292 +++++++++++++++++++-------------------------- 1 file changed, 126 insertions(+), 166 deletions(-) diff --git a/quark_auto_save.py b/quark_auto_save.py index 87a69cb..7678c6d 100644 --- a/quark_auto_save.py +++ b/quark_auto_save.py @@ -23,205 +23,161 @@ def sort_file_by_name(file): """ 通用的文件排序函数,用于根据文件名智能排序 支持多种格式的日期、期数、集数等提取和排序 + 使用多级排序键,按日期、期数、上中下顺序排序 + 如果以上均无法提取,则使用文件更新时间作为最后排序依据 """ if isinstance(file, dict) and file.get("dir", False): # 跳过文件夹 - return float('inf') + return (float('inf'), float('inf'), float('inf'), 0) # 获取文件名,支持字符串或文件对象 if isinstance(file, dict): filename = file.get("file_name", "") + # 获取更新时间作为最后排序依据 + update_time = file.get("updated_at", 0) else: filename = file + update_time = 0 # 提取文件名,不含扩展名 file_name_without_ext = os.path.splitext(filename)[0] - # 1. 日期格式识别(支持多种格式)- 最高优先级 + # 初始化排序值 + date_value = float('inf') # 日期键(第一级) + episode_value = float('inf') # 期数/集数键(第二级) + segment_value = 0 # 上中下/其他细分键(第三级) - # 1.1 日期+期数的复合格式,例如:2025-04-18 第5期上 - match_date_episode = re.search(r'(20\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2}).*?第(\d+)[期集话]', filename) - if match_date_episode: - year = int(match_date_episode.group(1)) - month = int(match_date_episode.group(2)) - day = int(match_date_episode.group(3)) - episode = int(match_date_episode.group(4)) - date_val = year * 10000 + month * 100 + day - # 将日期值作为主排序,期数为次要排序 - if '上' in filename: - return date_val * 100 + episode * 10 + 1 - elif '中' in filename: - return date_val * 100 + episode * 10 + 2 - elif '下' in filename: - return date_val * 100 + episode * 10 + 3 - return date_val * 100 + episode * 10 + # 1. 提取日期 - 第一级排序键 - # 1.2 使用两位年份的日期+期数格式,如:23-04-18 第5期 - match_yy_date_episode = re.search(r'((?:19|20)?\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2}).*?第(\d+)[期集话]', filename) - if match_yy_date_episode and len(match_yy_date_episode.group(1)) == 2: - year_str = match_yy_date_episode.group(1) - # 如果是两位年份,假设20xx年 - year = int("20" + year_str) - month = int(match_yy_date_episode.group(2)) - day = int(match_yy_date_episode.group(3)) - episode = int(match_yy_date_episode.group(4)) - date_val = year * 10000 + month * 100 + day - if '上' in filename: - return date_val * 100 + episode * 10 + 1 - elif '中' in filename: - return date_val * 100 + episode * 10 + 2 - elif '下' in filename: - return date_val * 100 + episode * 10 + 3 - return date_val * 100 + episode * 10 - - # 1.3 完整的YYYYMMDD格式(无分隔符) - match_date_compact = re.search(r'((?:19|20)\d{2})(\d{2})(\d{2})', filename) - if match_date_compact: - year = int(match_date_compact.group(1)) - month = int(match_date_compact.group(2)) - day = int(match_date_compact.group(3)) - return year * 10000 + month * 100 + day - - # 1.4 YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 或 YYYY MM DD格式 + # 1.1 YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 或 YYYY MM DD格式(四位年份) match_date_full = re.search(r'((?:19|20)\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2})', filename) if match_date_full: year = int(match_date_full.group(1)) month = int(match_date_full.group(2)) day = int(match_date_full.group(3)) - return year * 10000 + month * 100 + day + date_value = year * 10000 + month * 100 + day - # 1.5 YY-MM-DD 或 YY.MM.DD 或 YY/MM/DD 或 YY MM DD格式(两位年份) - match_yy_date = re.search(r'((?:19|20)?\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2})', filename) - if match_yy_date and len(match_yy_date.group(1)) == 2: - year_str = match_yy_date.group(1) - # 如果是两位年份,假设20xx年 - year = int("20" + year_str) - month = int(match_yy_date.group(2)) - day = int(match_yy_date.group(3)) - return year * 10000 + month * 100 + day - - # 1.6 YYMMDD格式(两位年份,无分隔符) - match_yy_compact = re.search(r'(? 12: - month, day = day, month - return year * 10000 + month * 100 + day + # 1.3 完整的YYYYMMDD格式(无分隔符) + if date_value == float('inf'): + match_date_compact = re.search(r'((?:19|20)\d{2})(\d{2})(\d{2})', filename) + if match_date_compact: + year = int(match_date_compact.group(1)) + month = int(match_date_compact.group(2)) + day = int(match_date_compact.group(3)) + date_value = year * 10000 + month * 100 + day - # 1.8 年期格式,如"2025年14期" - match_year_issue = re.search(r'((?:19|20)\d{2})[年].*?(\d+)[期]', filename) - if match_year_issue: - year = int(match_year_issue.group(1)) - issue = int(match_year_issue.group(2)) - return year * 1000 + issue + # 1.4 YYMMDD格式(两位年份,无分隔符) + if date_value == float('inf'): + match_yy_compact = re.search(r'(? 12: - month, day = day, month - # 由于没有年份,使用一个较低的基数,确保任何有年份的日期都排在后面 - return month * 100 + day + # 1.5 MM/DD/YYYY 或 DD/MM/YYYY 格式 + if date_value == float('inf'): + match_date_alt = re.search(r'(\d{1,2})[-./\s](\d{1,2})[-./\s]((?:19|20)\d{2})', filename) + if match_date_alt: + # 假设第一个是月,第二个是日(美式日期) + month = int(match_date_alt.group(1)) + day = int(match_date_alt.group(2)) + year = int(match_date_alt.group(3)) + # 检查月份值,如果大于12可能是欧式日期格式(DD/MM/YYYY) + if month > 12: + month, day = day, month + date_value = year * 10000 + month * 100 + day - # 2. "第X期/集/话" 格式 + # 1.6 MM-DD 或 MM.DD 或 MM/DD 或 MM DD格式(无年份) + if date_value == float('inf'): + match_date_short = re.search(r'(? 12: + month, day = day, month + # 由于没有年份,使用一个较低的基数,确保任何有年份的日期都排在前面 + # 使用20000000作为基准,所以无年份日期都会排在有年份日期之后 + date_value = 20000000 + month * 100 + day + + # 2. 提取期数/集数 - 第二级排序键 + + # 2.1 "第X期/集/话" 格式 match_chinese = re.search(r'第(\d+)[期集话]', filename) - episode_num = int(match_chinese.group(1)) if match_chinese else 0 - - # 文件名含"上中下"(优先处理,因为可能与其他格式同时存在) if match_chinese: - # 如果同时存在集数和上中下,则按照集数*10+位置排序 - if '上' in filename: - return episode_num * 10 + 1 - elif '中' in filename: - return episode_num * 10 + 2 - elif '下' in filename: - return episode_num * 10 + 3 - return episode_num * 10 - elif '上' in filename: - return 1 - elif '中' in filename: - return 2 - elif '下' in filename: - return 3 + episode_value = int(match_chinese.group(1)) - # 2.1 "X集/期/话" 格式 - match_chinese_simple = re.search(r'(\d+)[期集话]', filename) - if match_chinese_simple: - return int(match_chinese_simple.group(1)) + # 2.2 "X集/期/话" 格式 + if episode_value == float('inf'): + match_chinese_simple = re.search(r'(\d+)[期集话]', filename) + if match_chinese_simple: + episode_value = int(match_chinese_simple.group(1)) - # 3.1 S01E01 格式,提取季数和集数 - match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) - if match_s_e: - season = int(match_s_e.group(1)) - episode = int(match_s_e.group(2)) - return season * 1000 + episode + # 2.3 S01E01格式 + if episode_value == float('inf'): + match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) + if match_s_e: + season = int(match_s_e.group(1)) + episode = int(match_s_e.group(2)) + # 使用季*1000+集作为期数值 + episode_value = episode # 只用集数作为排序键 - # 3.2 E01/EP01 格式,仅提取集数 - match_e = re.search(r'[Ee][Pp]?(\d+)', filename) - if match_e: - return int(match_e.group(1)) + # 2.4 E01/EP01格式 + if episode_value == float('inf'): + match_e = re.search(r'[Ee][Pp]?(\d+)', filename) + if match_e: + episode_value = int(match_e.group(1)) - # 3.3 1x01 格式,提取季数和集数 - match_x = re.search(r'(\d+)[Xx](\d+)', filename) - if match_x: - season = int(match_x.group(1)) - episode = int(match_x.group(2)) - return season * 1000 + episode + # 2.5 1x01格式 + if episode_value == float('inf'): + match_x = re.search(r'(\d+)[Xx](\d+)', filename) + if match_x: + episode = int(match_x.group(2)) + episode_value = episode - # 3.4 数字后接4K格式 - match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename) - if match_4k: - return int(match_4k.group(1)) + # 2.6 方括号/中括号包围的数字 + if episode_value == float('inf'): + match_bracket = re.search(r'\[(\d+)\]|【(\d+)】', filename) + if match_bracket: + episode_value = int(match_bracket.group(1) if match_bracket.group(1) else match_bracket.group(2)) - # 3.5 方括号包围的数字 - match_bracket = re.search(r'\[(\d+)\]', filename) - if match_bracket: - return int(match_bracket.group(1)) + # 2.7 其他数字格式(如果没有明确的期数) + if episode_value == float('inf'): + # 优先尝试纯数字文件名 + if file_name_without_ext.isdigit(): + episode_value = int(file_name_without_ext) + else: + # 否则尝试提取任何数字 + any_num_match = re.search(r'(\d+)', filename) + if any_num_match: + episode_value = int(any_num_match.group(1)) - # 3.6 中括号包围的数字 - match_cn_bracket = re.search(r'【(\d+)】', filename) - if match_cn_bracket: - return int(match_cn_bracket.group(1)) + # 3. 提取上中下标记或其他细分 - 第三级排序键 + if re.search(r'上[集期话部篇]?|[集期话部篇]上', filename): + segment_value = 1 + elif re.search(r'中[集期话部篇]?|[集期话部篇]中', filename): + segment_value = 2 + elif re.search(r'下[集期话部篇]?|[集期话部篇]下', filename): + segment_value = 3 - # 3.7 下划线包围的数字 - match_underscore = re.search(r'_?(\d+)_', filename) - if match_underscore: - return int(match_underscore.group(1)) - - # 4. 纯数字格式(文件名开头是纯数字) - match_num = re.match(r'^(\d+)', file_name_without_ext) - if match_num: - return int(match_num.group(1)) - - # 5. 尝试匹配文件名中的任何数字 - any_num_match = re.search(r'(\d+)', filename) - if any_num_match: - return int(any_num_match.group(1)) - - # 6. 默认使用更新时间 - if isinstance(file, dict): - return file.get("created_at", file.get("updated_at", file.get("last_update_at", 0))) - - return float('inf') + # 返回多级排序元组,加入更新时间作为第四级排序键 + return (date_value, episode_value, segment_value, update_time) # 全局的剧集编号提取函数 @@ -1306,7 +1262,9 @@ class Quark: # 实现高级排序算法 def extract_sorting_value(file): # 使用全局排序函数 - return sort_file_by_name(file) + sort_tuple = sort_file_by_name(file) + # 返回排序元组,实现多级排序 + return sort_tuple # 判断是否使用单独的{}模式 @@ -1728,7 +1686,9 @@ class Quark: # 实现高级排序算法 def extract_sorting_value(file): # 使用全局排序函数 - return sort_file_by_name(file) + sort_tuple = sort_file_by_name(file) + # 返回排序元组,实现多级排序 + return sort_tuple # 判断是否使用单独的{}模式 From 55c515db4ed4a489604b0a4c5421daf008192dad Mon Sep 17 00:00:00 2001 From: x1ao4 Date: Sat, 26 Apr 2025 22:48:48 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=89=A7=E9=9B=86?= =?UTF-8?q?=E7=BC=96=E5=8F=B7=E6=8F=90=E5=8F=96=E5=87=BD=E6=95=B0=E5=8F=AF?= =?UTF-8?q?=E8=83=BD=E4=BC=9A=E6=8A=8A=E6=97=A5=E6=9C=9F=E6=8F=90=E5=8F=96?= =?UTF-8?q?=E4=B8=BA=E5=89=A7=E9=9B=86=E7=BC=96=E5=8F=B7=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- quark_auto_save.py | 111 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 10 deletions(-) diff --git a/quark_auto_save.py b/quark_auto_save.py index 7678c6d..2beb709 100644 --- a/quark_auto_save.py +++ b/quark_auto_save.py @@ -193,14 +193,76 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None): Returns: int: 提取到的剧集号,如果无法提取则返回None """ + # 预处理:排除文件名中可能是日期的部分,避免误识别 + date_patterns = [ + # YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 或 YYYY MM DD格式(四位年份) + r'((?:19|20)\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2})', + # YY-MM-DD 或 YY.MM.DD 或 YY/MM/DD 或 YY MM DD格式(两位年份) + r'((?:19|20)?\d{2})[-./\s](\d{1,2})[-./\s](\d{1,2})', + # 完整的YYYYMMDD格式(无分隔符) + r'((?:19|20)\d{2})(\d{2})(\d{2})', + # YYMMDD格式(两位年份,无分隔符) + r'(?= 3: + if re.match(r'(?:19|20)\d{2}', match.group(1)): # 首个分组是年份 + month = int(match.group(2)) + day = int(match.group(3)) + elif re.match(r'(?:19|20)\d{2}', match.group(3)): # 末尾分组是年份 + month = int(match.group(1)) + day = int(match.group(2)) + else: + # 处理两位数年份的情况(如25.03.21) + try: + # 假设第一个是年份,第二个是月,第三个是日 + year = int(match.group(1)) + month = int(match.group(2)) + day = int(match.group(3)) + + # 如果月和日在有效范围内,则这可能是一个日期 + if 1 <= month <= 12 and 1 <= day <= 31: + pass # 保持month和day的值 + else: + # 尝试另一种解释:月.日.年 + month = int(match.group(1)) + day = int(match.group(2)) + # 检查月和日的有效性 + if not (1 <= month <= 12 and 1 <= day <= 31): + # 仍然无效,重置month和day + month = None + day = None + except ValueError: + # 转换失败,保持month和day为None + pass + + # 如果能确定月日且是有效的日期,则从文件名中删除该日期 + if month and day and 1 <= month <= 12 and 1 <= day <= 31: + filename_without_dates = filename_without_dates.replace(date_str, " ") + # 优先匹配SxxExx格式 - match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename) + match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename_without_dates) if match_s_e: # 直接返回E后面的集数 return int(match_s_e.group(2)) # 其次匹配E01格式 - match_e = re.search(r'[Ee][Pp]?(\d+)', filename) + match_e = re.search(r'[Ee][Pp]?(\d+)', filename_without_dates) if match_e: return int(match_e.group(1)) @@ -233,19 +295,37 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None): else: patterns = default_patterns - # 尝试使用每个正则表达式匹配文件名 + # 尝试使用每个正则表达式匹配文件名(使用不含日期的文件名) for pattern_regex in patterns: try: - match = re.search(pattern_regex, filename) + match = re.search(pattern_regex, filename_without_dates) if match: - return int(match.group(1)) + episode_num = int(match.group(1)) + + # 检查提取的数字是否可能是日期的一部分 + # 如果是纯数字并且可能是日期格式,则跳过 + if str(episode_num).isdigit() and is_date_format(str(episode_num)): + continue + + return episode_num except: continue - # 尝试其他通用提取方法 - 提取任何数字 - num_match = re.search(r'(\d+)', filename) + # 如果从不含日期的文件名中没有找到剧集号,尝试从原始文件名中提取 + # 这是为了兼容某些特殊情况,但要检查提取的数字不是日期 + file_name_without_ext = os.path.splitext(filename)[0] + + # 如果文件名是纯数字,且不是日期格式,则可能是剧集号 + if file_name_without_ext.isdigit() and not is_date_format(file_name_without_ext): + return int(file_name_without_ext) + + # 最后尝试提取任何数字,但要排除日期可能性 + num_match = re.search(r'(\d+)', filename_without_dates) if num_match: - return int(num_match.group(1)) + episode_num = int(num_match.group(1)) + # 检查提取的数字是否可能是日期 + if not is_date_format(str(episode_num)): + return episode_num return None @@ -260,7 +340,7 @@ NOTIFYS = [] def is_date_format(number_str): """ 判断一个纯数字字符串是否可能是日期格式 - 支持的格式:YYYYMMDD, MMDD + 支持的格式:YYYYMMDD, MMDD, YYMMDD """ # 判断YYYYMMDD格式 (8位数字) if len(number_str) == 8 and number_str.startswith('20'): @@ -273,6 +353,17 @@ def is_date_format(number_str): # 可能是日期格式 return True + # 判断YYMMDD格式 (6位数字) + elif len(number_str) == 6: + year_str = number_str[:2] + month = int(number_str[2:4]) + day = int(number_str[4:6]) + + # 检查月份和日期是否有效 + if 1 <= month <= 12 and 1 <= day <= 31: + # 可能是日期格式 + return True + # 判断MMDD格式 (4位数字) elif len(number_str) == 4: month = int(number_str[:2]) @@ -283,7 +374,7 @@ def is_date_format(number_str): # 可能是日期格式 return True - # 其他长度的纯数字不视为日期格式 + # 其他格式不视为日期格式 return False # 兼容青龙 From 60740487c0246db54036c9abad3bac33bb254f30 Mon Sep 17 00:00:00 2001 From: x1ao4 Date: Sun, 27 Apr 2025 01:29:12 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E9=A2=84=E7=BD=AE?= =?UTF-8?q?=E7=9A=84=E6=B5=8B=E8=AF=95=E4=BB=BB=E5=8A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- quark_config.json | 62 +++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/quark_config.json b/quark_config.json index 5a83ec8..beb41b9 100644 --- a/quark_config.json +++ b/quark_config.json @@ -24,34 +24,50 @@ }, "tasklist": [ { - "taskname": "测试-魔法匹配剧集(这是一组有效分享,配置CK后可测试任务是否正常)", - "shareurl": "https://pan.quark.cn/s/d07a34a9c695#/list/share/7e25ddd87cf64443b637125478733295-夸克自动转存测试", - "savepath": "/夸克自动转存测试", - "pattern": "$TV", - "replace": "", - "enddate": "2099-01-30", - "update_subdir": "4k|1080p" - }, + "taskname": "测试 - 正则命名", + "shareurl": "https://pan.quark.cn/s/ebd8a90f30c0#/list/share/ffccd89838a24d5daf76f0249d40e980-NHXQL", + "savepath": "测试/正则命名 - 你好,星期六", + "pattern": "(\\d{4})(-?)(\\d{2})(-?)(\\d{2})(期?)", + "replace": "你好,星期六 - \\1-\\3-\\5", + "filterwords": "加更,企划", + "enddate": "2099-01-30" + }, { - "taskname": "测试-广告过滤", - "shareurl": "https://pan.quark.cn/s/d07a34a9c695#/list/share/7e25ddd87cf64443b637125478733295-夸克自动转存测试/680d91e490814da0927c38b432f88edc-带广告文件夹", - "savepath": "/夸克自动转存测试/带广告文件夹", - "pattern": "【XX电影网】(.*)\\.(mp4|mkv)", - "replace": "\\1.\\2", + "taskname": "测试 - 顺序命名", + "shareurl": "https://pan.quark.cn/s/59169f522931#/list/share/364e88a7ef6449beb07120fb65695e8c-CF2025", + "savepath": "测试/顺序命名 - 乘风2025", + "pattern": "乘风2025 - S06E{}", + "filterwords": "超前,加更,训练,蒸蒸,纯享,团播,抢先", + "use_sequence_naming": true, + "use_episode_naming": false, + "sequence_naming": "乘风2025 - S06E{}", "enddate": "2099-01-30" }, { - "taskname": "测试-超期任务", - "shareurl": "https://pan.quark.cn/s/d07a34a9c695#/list/share/7e25ddd87cf64443b637125478733295-夸克自动转存测试", - "savepath": "/夸克自动转存测试", + "taskname": "测试 - 剧集命名", + "shareurl": "https://pan.quark.cn/s/100c8d659137#/list/share/05e7fe9ee511437eb8c06ef7b978df06-HJ", + "savepath": "测试/剧集命名 - 黑镜", + "pattern": "黑镜 - S01E[]", + "filterwords": "", + "use_sequence_naming": false, + "use_episode_naming": true, + "episode_naming": "黑镜 - S01E[]", + "enddate": "2099-01-30" + }, + { + "taskname": "测试 - 直接转存", + "shareurl": "https://pan.quark.cn/s/100c8d659137#/list/share/05e7fe9ee511437eb8c06ef7b978df06-HJ", + "savepath": "测试/直接转存 - 黑镜", "pattern": "", "replace": "", - "enddate": "2000-01-30", - "runweek": [ - 2, - 4, - 6 - ] + "filterwords": "", + "update_subdir": "S02", + "enddate": "2099-01-30" + } + ], + "episode_patterns": [ + { + "regex": "第(\\d+)集|第(\\d+)期|第(\\d+)话|(\\d+)集|(\\d+)期|(\\d+)话|[Ee][Pp]?(\\d+)|(\\d+)[-_\\\\s]*4[Kk]|\\[(\\d+)\\]|【(\\d+)】|_?(\\d+)_?" } ] -} \ No newline at end of file +}