优化了剧集编号提取和文件排序的逻辑

This commit is contained in:
x1ao4 2025-04-22 23:19:39 +08:00
parent afdb534a7b
commit 2463e214dc
2 changed files with 128 additions and 58 deletions

View File

@ -176,7 +176,7 @@
<div class="col-10">
<h2 style="display: inline-block;"><i class="bi bi-list-ol"></i> 剧集识别</h2>
<span class="badge badge-pill badge-light">
<a href="#" target="_blank" @click.prevent="alert('剧集识别说明:\n在任务配置中填写包含E[]的匹配表达式,如「剧名 - S01E[]」,将自动切换至剧集命名模式,自动识别文件名中的编号作为集编号,并替换[]部分。\n\n集编号匹配规则\n用于识别集编号的正则表达式多个表达式之间用竖线「|」分隔,每个表达式必须包含一个捕获组,用于提取剧集编号')">?</a>
<a href="https://github.com/x1ao4/quark-auto-save-x/wiki/正则处理教程#25-剧集命名" target="_blank">?</a>
</span>
</div>
</div>
@ -185,7 +185,7 @@
<div class="input-group-prepend">
<span class="input-group-text">集编号识别规则</span>
</div>
<input type="text" class="form-control" v-model="episodePatternsText" placeholder="多个正则表达式用竖线「|」分隔,如:(\\d+)|[Ee](\\d+)|第(\\d+)集">
<input type="text" class="form-control" v-model="episodePatternsText" placeholder="输入用于识别集编号的正则表达式,多个表达式用竖线「|」分隔,如:第(\d+)集|[Ee][Pp]?(\d+)|(\d+)[-_\\s]*4[Kk]">
</div>
</div>
@ -677,19 +677,17 @@
// 如果没有剧集识别模式,添加默认模式
if (!this.formData.episode_patterns || this.formData.episode_patterns.length === 0) {
this.formData.episode_patterns = [
{ name: 'EP_BASIC', description: '[]', regex: '(\\d+)' },
{ name: 'EP_4K', description: '[]-4K', regex: '(\\d+)[-_\\s]*4[Kk]' },
{ name: 'EP_HUA', description: '[]话', regex: '(\\d+)话' },
{ name: 'EP_E', description: 'E[]', regex: '[Ee](\\d+)' },
{ name: 'EP_EP', description: 'EP[]', regex: '[Ee][Pp](\\d+)' },
{ name: 'EP_DIHUA', description: '第[]话', regex: '第(\\d+)话' },
{ name: 'EP_DIJI', description: '第[]集', regex: '第(\\d+)集' },
{ name: 'EP_DIQI', description: '第[]期', regex: '第(\\d+)期' },
{ name: 'EP_4KSPACE', description: '[] 4K', regex: '(\\d+)\\s+4[Kk]' },
{ name: 'EP_4KUNDER', description: '[]_4K', regex: '(\\d+)[_\\s]4[Kk]' },
{ name: 'EP_BRACKET', description: '【[]】', regex: '【(\\d+)】' },
{ name: 'EP_DIHUA', description: '第[]话', regex: '第(\\d+)话' },
{ name: 'EP_JI', description: '[]集', regex: '(\\d+)集' },
{ name: 'EP_QI', description: '[]期', regex: '(\\d+)期' },
{ name: 'EP_HUA', description: '[]话', regex: '(\\d+)话' },
{ name: 'EP_E_EP', description: 'E/EP[]', regex: '[Ee][Pp]?(\\d+)' },
{ name: 'EP_4K', description: '[]-4K', regex: '(\\d+)[-_\\s]*4[Kk]' },
{ name: 'EP_SQUAREBRACKET', description: '方括号数字', regex: '\\[(\\d+)\\]' },
{ name: 'EP_UNDERSCORE', description: '_[]_', regex: '_?(\\d+)_' }
{ name: 'EP_BRACKET', description: '【[]】', regex: '【(\\d+)】' },
{ name: 'EP_UNDERSCORE', description: '_[]_', regex: '_?(\\d+)_?' }
];
}
}, 500);
@ -1220,8 +1218,8 @@
},
detectNamingMode(task) {
// 检测是否为顺序命名模式或剧集命名模式
const sequencePatterns = ['{}', 'E{}', 'EP{}', 'S\\d+E{}', '第{}集', '第{}话', '第{}期'];
const episodePatterns = ['[]', 'E[]', 'EP[]', 'S\\d+E[]', '第[]集', '第[]话', '第[]期'];
const sequencePatterns = ['{}集', '第{}期', '第{}话', '{}集', '{}期', '{}话', 'E{}', 'EP{}', 'S\\d+E{}', '[{}]', '【{}】', '_{}_'];
const episodePatterns = ['第{% raw %}[]{% endraw %}集', '第{% raw %}[]{% endraw %}期', '第{% raw %}[]{% endraw %}话', '{% raw %}[]{% endraw %}集', '{% raw %}[]{% endraw %}期', '{% raw %}[]{% endraw %}话', 'E{% raw %}[]{% endraw %}', 'EP{% raw %}[]{% endraw %}', 'S\\d+E{% raw %}[]{% endraw %}', '[{% raw %}[]{% endraw %}', '【{% raw %}[]{% endraw %}】', '_{% raw %}[]{% endraw %}_'];
let isSequenceNaming = false;
let isEpisodeNaming = false;

View File

@ -244,19 +244,17 @@ class Config:
if not config_data.get("episode_patterns"):
print("🔼 添加剧集识别模式配置")
config_data["episode_patterns"] = [
{"description": "[]", "regex": "(\\d+)"},
{"description": "[]-4K", "regex": "(\\d+)[-_\\s]*4[Kk]"},
{"description": "[]话", "regex": "(\\d+)话"},
{"description": "E[]", "regex": "[Ee](\\d+)"},
{"description": "EP[]", "regex": "[Ee][Pp](\\d+)"},
{"description": "第[]话", "regex": "第(\\d+)话"},
{"description": "第[]集", "regex": "第(\\d+)集"},
{"description": "第[]期", "regex": "第(\\d+)期"},
{"description": "[] 4K", "regex": "(\\d+)\\s+4[Kk]"},
{"description": "[]_4K", "regex": "(\\d+)[_\\s]4[Kk]"},
{"description": "【[]】", "regex": "【(\\d+)】"},
{"description": "第[]话", "regex": "第(\\d+)话"},
{"description": "[]集", "regex": "(\\d+)集"},
{"description": "[]期", "regex": "(\\d+)期"},
{"description": "[]话", "regex": "(\\d+)话"},
{"description": "E/EP[]", "regex": "[Ee][Pp]?(\\d+)"},
{"description": "[]-4K", "regex": "(\\d+)[-_\\s]*4[Kk]"},
{"description": "[[]", "regex": "\\[(\\d+)\\]"},
{"description": "_[]_", "regex": "_?(\\d+)_"}
{"description": "【[]】", "regex": "【(\\d+)】"},
{"description": "_[]_", "regex": "_?(\\d+)_?"}
]
@ -986,7 +984,7 @@ class Quark:
# 提取文件名,不含扩展名
file_name_without_ext = os.path.splitext(filename)[0]
# 1. "第X期/集/话" 格式
# 1. "第X期/集/话" 格式 - 保持最高优先级
match_chinese = re.search(r'第(\d+)[期集话]', filename)
episode_num = int(match_chinese.group(1)) if match_chinese else 0
@ -1007,6 +1005,11 @@ class Quark:
elif '' in filename:
return 3
# 1.2 "X集/期/话" 格式 - 与我们修改后的优先级一致
match_chinese_simple = re.search(r'(\d+)[期集话]', filename)
if match_chinese_simple:
return int(match_chinese_simple.group(1))
# 2.1 S01E01 格式,提取季数和集数
match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename)
if match_s_e:
@ -1014,7 +1017,7 @@ class Quark:
episode = int(match_s_e.group(2))
return season * 1000 + episode
# 2.2 E01 格式,仅提取集数
# 2.2 E01/EP01 格式,仅提取集数
match_e = re.search(r'[Ee][Pp]?(\d+)', filename)
if match_e:
return int(match_e.group(1))
@ -1026,6 +1029,26 @@ class Quark:
episode = int(match_x.group(2))
return season * 1000 + episode
# 2.4 数字后接4K格式
match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename)
if match_4k:
return int(match_4k.group(1))
# 2.5 方括号包围的数字
match_bracket = re.search(r'\[(\d+)\]', filename)
if match_bracket:
return int(match_bracket.group(1))
# 2.6 中括号包围的数字
match_cn_bracket = re.search(r'【(\d+)】', filename)
if match_cn_bracket:
return int(match_cn_bracket.group(1))
# 2.7 下划线包围的数字
match_underscore = re.search(r'_?(\d+)_', filename)
if match_underscore:
return int(match_underscore.group(1))
# 3. 日期格式识别(支持多种格式)
# 3.1 完整的YYYYMMDD格式
@ -1374,7 +1397,7 @@ class Quark:
# 提取文件名,不含扩展名
file_name_without_ext = os.path.splitext(filename)[0]
# 1. "第X期/集/话" 格式
# 1. "第X期/集/话" 格式 - 保持最高优先级
match_chinese = re.search(r'第(\d+)[期集话]', filename)
episode_num = int(match_chinese.group(1)) if match_chinese else 0
@ -1395,6 +1418,11 @@ class Quark:
elif '' in filename:
return 3
# 1.2 "X集/期/话" 格式 - 与我们修改后的优先级一致
match_chinese_simple = re.search(r'(\d+)[期集话]', filename)
if match_chinese_simple:
return int(match_chinese_simple.group(1))
# 2.1 S01E01 格式,提取季数和集数
match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename)
if match_s_e:
@ -1402,7 +1430,7 @@ class Quark:
episode = int(match_s_e.group(2))
return season * 1000 + episode
# 2.2 E01 格式,仅提取集数
# 2.2 E01/EP01 格式,仅提取集数
match_e = re.search(r'[Ee][Pp]?(\d+)', filename)
if match_e:
return int(match_e.group(1))
@ -1414,6 +1442,26 @@ class Quark:
episode = int(match_x.group(2))
return season * 1000 + episode
# 2.4 数字后接4K格式
match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename)
if match_4k:
return int(match_4k.group(1))
# 2.5 方括号包围的数字
match_bracket = re.search(r'\[(\d+)\]', filename)
if match_bracket:
return int(match_bracket.group(1))
# 2.6 中括号包围的数字
match_cn_bracket = re.search(r'【(\d+)】', filename)
if match_cn_bracket:
return int(match_cn_bracket.group(1))
# 2.7 下划线包围的数字
match_underscore = re.search(r'_?(\d+)_', filename)
if match_underscore:
return int(match_underscore.group(1))
# 3. 日期格式识别(支持多种格式)
# 3.1 完整的YYYYMMDD格式
@ -1642,17 +1690,17 @@ class Quark:
# 尝试匹配更多格式
default_patterns = [
r'(\d+)',
r'(\d+)[-_\s]*4[Kk]',
r'(\d+)话',
r'第(\d+)话',
r'第(\d+)集',
r'第(\d+)期',
r'(\d+)\s+4[Kk]',
r'(\d+)[_\s]4[Kk]',
r'【(\d+)】',
r'第(\d+)话',
r'(\d+)集',
r'(\d+)期',
r'(\d+)话',
r'[Ee][Pp]?(\d+)',
r'(\d+)[-_\s]*4[Kk]',
r'\[(\d+)\]',
r'_?(\d+)_'
r'【(\d+)】',
r'_?(\d+)_?'
]
# 如果配置了自定义规则,优先使用
@ -1770,13 +1818,13 @@ class Quark:
season = int(match_s_e.group(1))
episode = int(match_s_e.group(2))
return (season * 1000 + episode, 0)
# 其他匹配方式
# 使用统一的剧集提取函数
episode_num = extract_episode_number(filename)
if episode_num is not None:
return (episode_num, 0)
# 无法识别,使用修改时间
# 无法识别,回退到修改时间排序
return (float('inf'), file.get("last_update_at", 0))
# 过滤出文件并排序
@ -2299,20 +2347,32 @@ def do_save(account, tasklist=[]):
return int(match_e.group(1))
# 尝试匹配更多格式
patterns = [
r'(\d+)',
r'(\d+)[-_\s]*4[Kk]',
r'(\d+)话',
r'第(\d+)话',
default_patterns = [
r'第(\d+)集',
r'第(\d+)期',
r'(\d+)\s+4[Kk]',
r'(\d+)[_\s]4[Kk]',
r'【(\d+)】',
r'第(\d+)话',
r'(\d+)集',
r'(\d+)期',
r'(\d+)话',
r'[Ee][Pp]?(\d+)',
r'(\d+)[-_\s]*4[Kk]',
r'\[(\d+)\]',
r'_?(\d+)_'
r'【(\d+)】',
r'_?(\d+)_?'
]
# 如果配置了自定义规则,优先使用
if "config_data" in task and isinstance(task["config_data"].get("episode_patterns"), list) and task["config_data"]["episode_patterns"]:
patterns = [p.get("regex", "(\\d+)") for p in task["config_data"]["episode_patterns"]]
else:
# 尝试从全局配置获取
global CONFIG_DATA
if isinstance(CONFIG_DATA.get("episode_patterns"), list) and CONFIG_DATA["episode_patterns"]:
patterns = [p.get("regex", "(\\d+)") for p in CONFIG_DATA["episode_patterns"]]
else:
patterns = default_patterns
# 尝试使用每个正则表达式匹配文件名
for pattern_regex in patterns:
try:
match = re.search(pattern_regex, filename)
@ -2414,20 +2474,32 @@ def do_save(account, tasklist=[]):
return int(match_e.group(1))
# 尝试匹配更多格式
patterns = [
r'(\d+)',
r'(\d+)[-_\s]*4[Kk]',
r'(\d+)话',
r'第(\d+)话',
default_patterns = [
r'第(\d+)集',
r'第(\d+)期',
r'(\d+)\s+4[Kk]',
r'(\d+)[_\s]4[Kk]',
r'【(\d+)】',
r'第(\d+)话',
r'(\d+)集',
r'(\d+)期',
r'(\d+)话',
r'[Ee][Pp]?(\d+)',
r'(\d+)[-_\s]*4[Kk]',
r'\[(\d+)\]',
r'_?(\d+)_'
r'【(\d+)】',
r'_?(\d+)_?'
]
# 如果配置了自定义规则,优先使用
if "config_data" in task and isinstance(task["config_data"].get("episode_patterns"), list) and task["config_data"]["episode_patterns"]:
patterns = [p.get("regex", "(\\d+)") for p in task["config_data"]["episode_patterns"]]
else:
# 尝试从全局配置获取
global CONFIG_DATA
if isinstance(CONFIG_DATA.get("episode_patterns"), list) and CONFIG_DATA["episode_patterns"]:
patterns = [p.get("regex", "(\\d+)") for p in CONFIG_DATA["episode_patterns"]]
else:
patterns = default_patterns
# 尝试使用每个正则表达式匹配文件名
for pattern_regex in patterns:
try:
match = re.search(pattern_regex, filename)