mirror of
https://github.com/Cp0204/quark-auto-save.git
synced 2026-01-12 15:20:44 +08:00
修复剧集编号提取时季数和集数混淆的问题
- 修复 "第X季 第Y集/期/话" 格式文件名错误提取季数为集数的问题 - 优化复合正则表达式的匹配优先级,优先匹配集/期/话模式 - 保持所有现有剧集格式的完全兼容性 - 无需用户修改配置即可生效
This commit is contained in:
parent
e45a57aed1
commit
2d32cd2019
@ -358,14 +358,14 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
||||
if match_e:
|
||||
return int(match_e.group(1))
|
||||
|
||||
# 尝试匹配更多格式
|
||||
# 尝试匹配更多格式(注意:避免匹配季数)
|
||||
default_patterns = [
|
||||
r'第(\d+)集',
|
||||
r'第(\d+)期',
|
||||
r'第(\d+)话',
|
||||
r'(\d+)集',
|
||||
r'(\d+)期',
|
||||
r'(\d+)话',
|
||||
r'(?<!第\d+季\s*)(\d+)集', # 避免匹配"第X季 Y集"中的季数
|
||||
r'(?<!第\d+季\s*)(\d+)期', # 避免匹配"第X季 Y期"中的季数
|
||||
r'(?<!第\d+季\s*)(\d+)话', # 避免匹配"第X季 Y话"中的季数
|
||||
r'[Ee][Pp]?(\d+)',
|
||||
r'(\d+)[-_\s]*4[Kk]',
|
||||
r'\[(\d+)\]',
|
||||
@ -400,16 +400,67 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
||||
# 尝试使用每个正则表达式匹配文件名(使用不含日期的文件名)
|
||||
for pattern_regex in patterns:
|
||||
try:
|
||||
match = re.search(pattern_regex, filename_without_dates)
|
||||
if match:
|
||||
episode_num = int(match.group(1))
|
||||
# 特殊处理:如果是包含多个捕获组的复合正则表达式
|
||||
if '|' in pattern_regex and '(' in pattern_regex:
|
||||
# 先尝试匹配集/期/话相关的模式,避免误匹配季数
|
||||
episode_specific_patterns = [
|
||||
r'第(\d+)集', r'第(\d+)期', r'第(\d+)话',
|
||||
r'(\d+)集', r'(\d+)期', r'(\d+)话'
|
||||
]
|
||||
|
||||
# 检查提取的数字是否可能是日期的一部分
|
||||
# 如果是纯数字并且可能是日期格式,则跳过
|
||||
if str(episode_num).isdigit() and is_date_format(str(episode_num)):
|
||||
continue
|
||||
for ep_pattern in episode_specific_patterns:
|
||||
ep_match = re.search(ep_pattern, filename_without_dates)
|
||||
if ep_match:
|
||||
# 检查这个匹配是否紧跟在"第X季"后面,如果是则跳过
|
||||
match_start = ep_match.start()
|
||||
prefix = filename_without_dates[:match_start]
|
||||
if re.search(r'第\d+季\s*$', prefix):
|
||||
continue # 跳过紧跟在季数后的匹配
|
||||
|
||||
return episode_num
|
||||
episode_num = int(ep_match.group(1))
|
||||
|
||||
# 检查提取的数字是否可能是日期的一部分
|
||||
if str(episode_num).isdigit() and is_date_format(str(episode_num)):
|
||||
continue
|
||||
|
||||
return episode_num
|
||||
|
||||
# 如果集/期/话模式都没匹配到,再尝试原始的复合正则表达式
|
||||
match = re.search(pattern_regex, filename_without_dates)
|
||||
if match:
|
||||
# 遍历所有捕获组,找到第一个非空的
|
||||
for group_num in range(1, len(match.groups()) + 1):
|
||||
if match.group(group_num):
|
||||
episode_num = int(match.group(group_num))
|
||||
|
||||
# 检查提取的数字是否可能是日期的一部分
|
||||
if str(episode_num).isdigit() and is_date_format(str(episode_num)):
|
||||
continue
|
||||
|
||||
# 额外检查:如果匹配的数字来自"第X季"格式,跳过
|
||||
match_start = match.start()
|
||||
match_end = match.end()
|
||||
|
||||
# 检查匹配前后的上下文,看是否是"第X季"格式
|
||||
context_start = max(0, match_start - 2)
|
||||
context_end = min(len(filename_without_dates), match_end + 1)
|
||||
context = filename_without_dates[context_start:context_end]
|
||||
|
||||
if re.search(r'第\d+季', context):
|
||||
continue
|
||||
|
||||
return episode_num
|
||||
else:
|
||||
# 单一模式的正则表达式
|
||||
match = re.search(pattern_regex, filename_without_dates)
|
||||
if match:
|
||||
episode_num = int(match.group(1))
|
||||
|
||||
# 检查提取的数字是否可能是日期的一部分
|
||||
if str(episode_num).isdigit() and is_date_format(str(episode_num)):
|
||||
continue
|
||||
|
||||
return episode_num
|
||||
except:
|
||||
continue
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user