mirror of
https://github.com/Cp0204/quark-auto-save.git
synced 2026-01-16 17:30:43 +08:00
优化集编号识别规则与逻辑,修复文件整理页面命名预览模态框重命名列的排序问题
This commit is contained in:
parent
f7371b660b
commit
27ee3948b8
177
app/run.py
177
app/run.py
@ -144,7 +144,7 @@ def enrich_tasks_with_calendar_meta(tasks_info: list) -> list:
|
|||||||
except Exception:
|
except Exception:
|
||||||
transferred_by_task = {}
|
transferred_by_task = {}
|
||||||
|
|
||||||
# 统计“已播出集数”:读取本地 episodes 表中有 air_date 且 <= 今天的集数
|
# 统计"已播出集数":读取本地 episodes 表中有 air_date 且 <= 今天的集数
|
||||||
from datetime import datetime as _dt
|
from datetime import datetime as _dt
|
||||||
today = _dt.now().strftime('%Y-%m-%d')
|
today = _dt.now().strftime('%Y-%m-%d')
|
||||||
aired_by_show_season = {}
|
aired_by_show_season = {}
|
||||||
@ -586,7 +586,7 @@ logging.basicConfig(
|
|||||||
format="[%(asctime)s][%(levelname)s] %(message)s",
|
format="[%(asctime)s][%(levelname)s] %(message)s",
|
||||||
datefmt="%m-%d %H:%M:%S",
|
datefmt="%m-%d %H:%M:%S",
|
||||||
)
|
)
|
||||||
# 降低第三方网络库的重试噪音:将 urllib3/requests 的日志调为 ERROR,并把“Retrying ...”消息降级为 DEBUG
|
# 降低第三方网络库的重试噪音:将 urllib3/requests 的日志调为 ERROR,并把"Retrying ..."消息降级为 DEBUG
|
||||||
try:
|
try:
|
||||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||||
logging.getLogger("requests").setLevel(logging.ERROR)
|
logging.getLogger("requests").setLevel(logging.ERROR)
|
||||||
@ -2113,32 +2113,15 @@ def get_share_detail():
|
|||||||
episode_pattern = regex.get("episode_naming")
|
episode_pattern = regex.get("episode_naming")
|
||||||
episode_patterns = regex.get("episode_patterns", [])
|
episode_patterns = regex.get("episode_patterns", [])
|
||||||
|
|
||||||
# 获取默认的剧集模式
|
# 获取用户补充的剧集模式(默认模式由后端内部提供,这里只处理用户补充)
|
||||||
default_episode_pattern = {"regex": '第(\\d+)集|第(\\d+)期|第(\\d+)话|(\\d+)集|(\\d+)期|(\\d+)话|[Ee][Pp]?(\\d+)|(\\d+)[-_\\s]*4[Kk]|\\[(\\d+)\\]|【(\\d+)】|_?(\\d+)_?'}
|
|
||||||
|
|
||||||
# 获取配置的剧集模式,确保每个模式都是字典格式
|
|
||||||
episode_patterns = []
|
episode_patterns = []
|
||||||
raw_patterns = config_data.get("episode_patterns", [default_episode_pattern])
|
raw_patterns = config_data.get("episode_patterns", [])
|
||||||
for p in raw_patterns:
|
for p in raw_patterns:
|
||||||
if isinstance(p, dict) and p.get("regex"):
|
if isinstance(p, dict) and p.get("regex"):
|
||||||
episode_patterns.append(p)
|
episode_patterns.append(p)
|
||||||
elif isinstance(p, str):
|
elif isinstance(p, str):
|
||||||
episode_patterns.append({"regex": p})
|
episode_patterns.append({"regex": p})
|
||||||
|
|
||||||
# 如果没有有效的模式,使用默认模式
|
|
||||||
if not episode_patterns:
|
|
||||||
episode_patterns = [default_episode_pattern]
|
|
||||||
|
|
||||||
# 添加中文数字匹配模式
|
|
||||||
chinese_patterns = [
|
|
||||||
{"regex": r'第([一二三四五六七八九十百千万零两]+)集'},
|
|
||||||
{"regex": r'第([一二三四五六七八九十百千万零两]+)期'},
|
|
||||||
{"regex": r'第([一二三四五六七八九十百千万零两]+)话'},
|
|
||||||
{"regex": r'([一二三四五六七八九十百千万零两]+)集'},
|
|
||||||
{"regex": r'([一二三四五六七八九十百千万零两]+)期'},
|
|
||||||
{"regex": r'([一二三四五六七八九十百千万零两]+)话'}
|
|
||||||
]
|
|
||||||
episode_patterns.extend(chinese_patterns)
|
|
||||||
|
|
||||||
# 应用高级过滤词过滤
|
# 应用高级过滤词过滤
|
||||||
filterwords = regex.get("filterwords", "")
|
filterwords = regex.get("filterwords", "")
|
||||||
@ -2558,6 +2541,9 @@ def init():
|
|||||||
# 读取配置
|
# 读取配置
|
||||||
config_data = Config.read_json(CONFIG_PATH)
|
config_data = Config.read_json(CONFIG_PATH)
|
||||||
Config.breaking_change_update(config_data)
|
Config.breaking_change_update(config_data)
|
||||||
|
|
||||||
|
# 自动清理剧集识别规则配置
|
||||||
|
cleanup_episode_patterns_config(config_data)
|
||||||
|
|
||||||
# 默认管理账号
|
# 默认管理账号
|
||||||
config_data["webui"] = {
|
config_data["webui"] = {
|
||||||
@ -3400,55 +3386,46 @@ def preview_rename():
|
|||||||
|
|
||||||
elif naming_mode == "episode":
|
elif naming_mode == "episode":
|
||||||
# 剧集命名模式
|
# 剧集命名模式
|
||||||
# 获取默认的剧集模式
|
# 获取用户补充的剧集模式(默认模式由后端内部提供,这里只处理用户补充)
|
||||||
default_episode_pattern = {"regex": '第(\\d+)集|第(\\d+)期|第(\\d+)话|(\\d+)集|(\\d+)期|(\\d+)话|[Ee][Pp]?(\\d+)|(\\d+)[-_\\s]*4[Kk]|\\[(\\d+)\\]|【(\\d+)】|_?(\\d+)_?'}
|
|
||||||
|
|
||||||
# 获取配置的剧集模式,确保每个模式都是字典格式
|
|
||||||
episode_patterns = []
|
episode_patterns = []
|
||||||
raw_patterns = config_data.get("episode_patterns", [default_episode_pattern])
|
raw_patterns = config_data.get("episode_patterns", [])
|
||||||
for p in raw_patterns:
|
for p in raw_patterns:
|
||||||
if isinstance(p, dict) and p.get("regex"):
|
if isinstance(p, dict) and p.get("regex"):
|
||||||
episode_patterns.append(p)
|
episode_patterns.append(p)
|
||||||
elif isinstance(p, str):
|
elif isinstance(p, str):
|
||||||
episode_patterns.append({"regex": p})
|
episode_patterns.append({"regex": p})
|
||||||
|
|
||||||
# 如果没有有效的模式,使用默认模式
|
|
||||||
if not episode_patterns:
|
|
||||||
episode_patterns = [default_episode_pattern]
|
|
||||||
|
|
||||||
# 添加中文数字匹配模式
|
|
||||||
chinese_patterns = [
|
|
||||||
{"regex": r'第([一二三四五六七八九十百千万零两]+)集'},
|
|
||||||
{"regex": r'第([一二三四五六七八九十百千万零两]+)期'},
|
|
||||||
{"regex": r'第([一二三四五六七八九十百千万零两]+)话'},
|
|
||||||
{"regex": r'([一二三四五六七八九十百千万零两]+)集'},
|
|
||||||
{"regex": r'([一二三四五六七八九十百千万零两]+)期'},
|
|
||||||
{"regex": r'([一二三四五六七八九十百千万零两]+)话'}
|
|
||||||
]
|
|
||||||
episode_patterns.extend(chinese_patterns)
|
|
||||||
|
|
||||||
# 处理每个文件
|
# 应用高级过滤词过滤(filterwords 已在函数开头获取)
|
||||||
|
if filterwords:
|
||||||
|
# 使用高级过滤函数
|
||||||
|
filtered_files = advanced_filter_files(filtered_files, filterwords)
|
||||||
|
# 标记被过滤的文件
|
||||||
|
for item in filtered_files:
|
||||||
|
if item not in filtered_files:
|
||||||
|
item["filtered"] = True
|
||||||
|
|
||||||
|
# 处理未被过滤的文件
|
||||||
for file in filtered_files:
|
for file in filtered_files:
|
||||||
extension = os.path.splitext(file["file_name"])[1] if not file["dir"] else ""
|
if not file["dir"] and not file.get("filtered"): # 只处理未被过滤的非目录文件
|
||||||
# 从文件名中提取集号
|
extension = os.path.splitext(file["file_name"])[1]
|
||||||
episode_num = extract_episode_number(file["file_name"], episode_patterns=episode_patterns)
|
# 从文件名中提取集号
|
||||||
|
episode_num = extract_episode_number(file["file_name"], episode_patterns=episode_patterns)
|
||||||
if episode_num is not None:
|
|
||||||
new_name = pattern.replace("[]", f"{episode_num:02d}") + extension
|
if episode_num is not None:
|
||||||
preview_results.append({
|
new_name = pattern.replace("[]", f"{episode_num:02d}") + extension
|
||||||
"original_name": file["file_name"],
|
preview_results.append({
|
||||||
"new_name": new_name,
|
"original_name": file["file_name"],
|
||||||
"file_id": file["fid"],
|
"new_name": new_name,
|
||||||
"episode_number": episode_num # 添加集数字段用于前端排序
|
"file_id": file["fid"]
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
# 没有提取到集号,显示无法识别的提示
|
# 没有提取到集号,显示无法识别的提示
|
||||||
preview_results.append({
|
preview_results.append({
|
||||||
"original_name": file["file_name"],
|
"original_name": file["file_name"],
|
||||||
"new_name": "× 无法识别剧集编号",
|
"new_name": "× 无法识别剧集编号",
|
||||||
"file_id": file["fid"]
|
"file_id": file["fid"]
|
||||||
})
|
})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# 正则命名模式
|
# 正则命名模式
|
||||||
for file in filtered_files:
|
for file in filtered_files:
|
||||||
@ -5682,6 +5659,84 @@ def get_content_types():
|
|||||||
'message': f'获取节目内容类型失败: {str(e)}'
|
'message': f'获取节目内容类型失败: {str(e)}'
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def cleanup_episode_patterns_config(config_data):
|
||||||
|
"""清理剧集识别规则配置"""
|
||||||
|
try:
|
||||||
|
# 需要清理的默认剧集识别规则(按部分规则匹配)
|
||||||
|
default_pattern_parts = [
|
||||||
|
"第(\\d+)集",
|
||||||
|
"第(\\d+)期",
|
||||||
|
"第(\\d+)话",
|
||||||
|
"(\\d+)集",
|
||||||
|
"(\\d+)期",
|
||||||
|
"(\\d+)话",
|
||||||
|
"[Ee][Pp]?(\\d+)",
|
||||||
|
"(\\d+)[-_\\s]*4[Kk]",
|
||||||
|
"(\\d+)[-_\\\\s]*4[Kk]",
|
||||||
|
"\\[(\\d+)\\]",
|
||||||
|
"【(\\d+)】",
|
||||||
|
"_?(\\d+)_?"
|
||||||
|
]
|
||||||
|
|
||||||
|
cleaned_tasks = 0
|
||||||
|
cleaned_global = False
|
||||||
|
|
||||||
|
# 1. 清理任务级别的 config_data.episode_patterns
|
||||||
|
if 'tasklist' in config_data:
|
||||||
|
for task in config_data['tasklist']:
|
||||||
|
if 'config_data' in task and 'episode_patterns' in task['config_data']:
|
||||||
|
del task['config_data']['episode_patterns']
|
||||||
|
cleaned_tasks += 1
|
||||||
|
# 如果 config_data 为空,删除整个 config_data
|
||||||
|
if not task['config_data']:
|
||||||
|
del task['config_data']
|
||||||
|
|
||||||
|
# 2. 清理全局配置中的默认规则
|
||||||
|
if 'episode_patterns' in config_data:
|
||||||
|
current_patterns = config_data['episode_patterns']
|
||||||
|
if isinstance(current_patterns, list):
|
||||||
|
# 过滤掉包含默认规则的配置
|
||||||
|
filtered_patterns = []
|
||||||
|
for pattern in current_patterns:
|
||||||
|
if isinstance(pattern, dict) and 'regex' in pattern:
|
||||||
|
pattern_regex = pattern['regex']
|
||||||
|
# 用竖线分割规则
|
||||||
|
pattern_parts = pattern_regex.split('|')
|
||||||
|
# 过滤掉默认规则部分,保留自定义规则
|
||||||
|
custom_parts = [part.strip() for part in pattern_parts if part.strip() not in default_pattern_parts]
|
||||||
|
|
||||||
|
if custom_parts:
|
||||||
|
# 如果有自定义规则,保留并重新组合
|
||||||
|
filtered_patterns.append({
|
||||||
|
'regex': '|'.join(custom_parts)
|
||||||
|
})
|
||||||
|
elif isinstance(pattern, str):
|
||||||
|
pattern_regex = pattern
|
||||||
|
# 用竖线分割规则
|
||||||
|
pattern_parts = pattern_regex.split('|')
|
||||||
|
# 过滤掉默认规则部分,保留自定义规则
|
||||||
|
custom_parts = [part.strip() for part in pattern_parts if part.strip() not in default_pattern_parts]
|
||||||
|
|
||||||
|
if custom_parts:
|
||||||
|
# 如果有自定义规则,保留并重新组合
|
||||||
|
filtered_patterns.append('|'.join(custom_parts))
|
||||||
|
|
||||||
|
# 更新配置
|
||||||
|
if filtered_patterns:
|
||||||
|
config_data['episode_patterns'] = filtered_patterns
|
||||||
|
else:
|
||||||
|
# 如果没有剩余规则,清空配置
|
||||||
|
config_data['episode_patterns'] = []
|
||||||
|
cleaned_global = True
|
||||||
|
|
||||||
|
# 静默执行清理操作,不输出日志
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"清理剧集识别规则配置失败: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
init()
|
init()
|
||||||
reload_tasks()
|
reload_tasks()
|
||||||
|
|||||||
@ -692,7 +692,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 剧集识别模块 -->
|
<!-- 剧集识别模块 -->
|
||||||
<div class="row title" title="识别文件名中的剧集编号,用于自动重命名,查阅Wiki了解详情">
|
<div class="row title" title="识别文件名中的剧集编号,用于自动重命名,留空时使用内置默认规则,支持输入自定义规则作为补充,查阅Wiki了解详情">
|
||||||
<div class="col">
|
<div class="col">
|
||||||
<h2 style="display: inline-block; font-size: 1.5rem;">剧集识别</h2>
|
<h2 style="display: inline-block; font-size: 1.5rem;">剧集识别</h2>
|
||||||
<span class="badge badge-pill badge-light">
|
<span class="badge badge-pill badge-light">
|
||||||
@ -705,7 +705,7 @@
|
|||||||
<div class="input-group-prepend">
|
<div class="input-group-prepend">
|
||||||
<span class="input-group-text">集编号识别规则</span>
|
<span class="input-group-text">集编号识别规则</span>
|
||||||
</div>
|
</div>
|
||||||
<input type="text" class="form-control" v-model="episodePatternsText" placeholder="输入用于识别集编号的正则表达式,多个表达式用竖线分隔,特殊符号需要转义">
|
<input type="text" class="form-control" v-model="episodePatternsText" placeholder="留空使用内置默认规则,或输入自定义正则表达式作为补充规则,多个表达式用竖线分隔,特殊符号需要转义">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -3256,11 +3256,15 @@
|
|||||||
return this.formData.episode_patterns.map(p => p.regex || '').join('|');
|
return this.formData.episode_patterns.map(p => p.regex || '').join('|');
|
||||||
},
|
},
|
||||||
set(value) {
|
set(value) {
|
||||||
// 允许直接输入正则表达式,当用户按下Enter键或失焦时再处理
|
// 支持竖线分割的多个正则表达式
|
||||||
// 这里我们创建一个单一的正则表达式对象,而不是拆分
|
if (!value || value.trim() === '') {
|
||||||
this.formData.episode_patterns = [{
|
this.formData.episode_patterns = [];
|
||||||
regex: value.trim()
|
return;
|
||||||
}];
|
}
|
||||||
|
|
||||||
|
// 按竖线分割并创建多个正则表达式对象
|
||||||
|
const patterns = value.split('|').map(p => p.trim()).filter(p => p !== '');
|
||||||
|
this.formData.episode_patterns = patterns.map(regex => ({ regex }));
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
// 管理视图:按任务名(拼音)排序并应用顶部筛选
|
// 管理视图:按任务名(拼音)排序并应用顶部筛选
|
||||||
@ -9689,9 +9693,23 @@
|
|||||||
bValue = String(b.episode_number);
|
bValue = String(b.episode_number);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 否则使用重命名后的文件名进行拼音排序
|
// 否则尝试从重命名结果中提取数字进行数值排序
|
||||||
aValue = pinyinPro.pinyin(a.file_name_re || '', { toneType: 'none', type: 'string' }).toLowerCase();
|
const aRename = a.file_name_re || '';
|
||||||
bValue = pinyinPro.pinyin(b.file_name_re || '', { toneType: 'none', type: 'string' }).toLowerCase();
|
const bRename = b.file_name_re || '';
|
||||||
|
|
||||||
|
// 尝试提取数字(包括小数)
|
||||||
|
const aMatch = aRename.match(/(\d+(?:\.\d+)?)/);
|
||||||
|
const bMatch = bRename.match(/(\d+(?:\.\d+)?)/);
|
||||||
|
|
||||||
|
if (aMatch && bMatch) {
|
||||||
|
// 如果都能提取到数字,进行数值比较
|
||||||
|
aValue = parseFloat(aMatch[1]);
|
||||||
|
bValue = parseFloat(bMatch[1]);
|
||||||
|
} else {
|
||||||
|
// 否则使用重命名后的文件名进行拼音排序
|
||||||
|
aValue = pinyinPro.pinyin(aRename, { toneType: 'none', type: 'string' }).toLowerCase();
|
||||||
|
bValue = pinyinPro.pinyin(bRename, { toneType: 'none', type: 'string' }).toLowerCase();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.fileSelect.sortOrder === 'asc') {
|
if (this.fileSelect.sortOrder === 'asc') {
|
||||||
@ -9820,9 +9838,23 @@
|
|||||||
bValue = String(b.episode_number);
|
bValue = String(b.episode_number);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 否则使用重命名后的文件名进行拼音排序
|
// 否则尝试从重命名结果中提取数字进行数值排序
|
||||||
aValue = pinyinPro.pinyin(a.file_name_re || '', { toneType: 'none', type: 'string' }).toLowerCase();
|
const aRename = a.file_name_re || '';
|
||||||
bValue = pinyinPro.pinyin(b.file_name_re || '', { toneType: 'none', type: 'string' }).toLowerCase();
|
const bRename = b.file_name_re || '';
|
||||||
|
|
||||||
|
// 尝试提取数字(包括小数)
|
||||||
|
const aMatch = aRename.match(/(\d+(?:\.\d+)?)/);
|
||||||
|
const bMatch = bRename.match(/(\d+(?:\.\d+)?)/);
|
||||||
|
|
||||||
|
if (aMatch && bMatch) {
|
||||||
|
// 如果都能提取到数字,进行数值比较
|
||||||
|
aValue = parseFloat(aMatch[1]);
|
||||||
|
bValue = parseFloat(bMatch[1]);
|
||||||
|
} else {
|
||||||
|
// 否则使用重命名后的文件名进行拼音排序
|
||||||
|
aValue = pinyinPro.pinyin(aRename, { toneType: 'none', type: 'string' }).toLowerCase();
|
||||||
|
bValue = pinyinPro.pinyin(bRename, { toneType: 'none', type: 'string' }).toLowerCase();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (order === 'asc') {
|
if (order === 'asc') {
|
||||||
|
|||||||
@ -289,7 +289,9 @@ def sort_file_by_name(file):
|
|||||||
if episode_value == float('inf'):
|
if episode_value == float('inf'):
|
||||||
match_e = re.search(r'[Ee][Pp]?(\d+)', filename)
|
match_e = re.search(r'[Ee][Pp]?(\d+)', filename)
|
||||||
if match_e:
|
if match_e:
|
||||||
episode_value = int(match_e.group(1))
|
# 若数字位于含字母的中括号内部,跳过该匹配
|
||||||
|
if not _in_alpha_brackets(filename, match_e.start(1), match_e.end(1)):
|
||||||
|
episode_value = int(match_e.group(1))
|
||||||
|
|
||||||
# 2.5 1x01格式
|
# 2.5 1x01格式
|
||||||
if episode_value == float('inf'):
|
if episode_value == float('inf'):
|
||||||
@ -315,16 +317,33 @@ def sort_file_by_name(file):
|
|||||||
resolution_patterns = [
|
resolution_patterns = [
|
||||||
r'\b\d+[pP]\b', # 匹配 720p, 1080P, 2160p 等
|
r'\b\d+[pP]\b', # 匹配 720p, 1080P, 2160p 等
|
||||||
r'\b\d+x\d+\b', # 匹配 1920x1080 等
|
r'\b\d+x\d+\b', # 匹配 1920x1080 等
|
||||||
# 注意:不移除4K/8K,避免误删文件名中的4K标识
|
r'(?<!\d)[248]\s*[Kk](?!\d)', # 匹配 2K/4K/8K
|
||||||
]
|
]
|
||||||
|
|
||||||
for pattern in resolution_patterns:
|
for pattern in resolution_patterns:
|
||||||
filename_without_resolution = re.sub(pattern, ' ', filename_without_resolution)
|
filename_without_resolution = re.sub(pattern, ' ', filename_without_resolution)
|
||||||
|
|
||||||
# 否则尝试提取任何数字
|
# 否则尝试提取任何数字
|
||||||
any_num_match = re.search(r'(\d+)', filename_without_resolution)
|
candidates = []
|
||||||
if any_num_match:
|
for m in re.finditer(r'\\d+', filename_without_resolution):
|
||||||
episode_value = int(any_num_match.group(1))
|
num_str = m.group(0)
|
||||||
|
# 过滤日期模式
|
||||||
|
if is_date_format(num_str):
|
||||||
|
continue
|
||||||
|
# 过滤中括号内且含字母的片段
|
||||||
|
span_l, span_r = m.start(), m.end()
|
||||||
|
if _in_alpha_brackets(filename_without_resolution, span_l, span_r):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
value = int(num_str)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if value > 9999:
|
||||||
|
continue
|
||||||
|
candidates.append((m.start(), value))
|
||||||
|
if candidates:
|
||||||
|
candidates.sort(key=lambda x: x[0])
|
||||||
|
episode_value = candidates[0][1]
|
||||||
|
|
||||||
# 3. 提取上中下标记或其他细分 - 第三级排序键
|
# 3. 提取上中下标记或其他细分 - 第三级排序键
|
||||||
segment_base = 0 # 基础值:上=1, 中=2, 下=3
|
segment_base = 0 # 基础值:上=1, 中=2, 下=3
|
||||||
@ -415,6 +434,54 @@ def sort_file_by_name(file):
|
|||||||
|
|
||||||
|
|
||||||
# 全局的剧集编号提取函数
|
# 全局的剧集编号提取函数
|
||||||
|
def _in_alpha_brackets(text, start, end):
|
||||||
|
"""
|
||||||
|
判断 [start,end) 范围内的数字是否位于"含字母的中括号对"内部。
|
||||||
|
支持英文方括号 [] 和中文方括号 【】。
|
||||||
|
要求:数字左侧最近的未闭合括号与右侧最近的对应闭合括号形成对,且括号内容包含字母。
|
||||||
|
但是允许 E/e 和 EP/ep/Ep 这样的集数格式。
|
||||||
|
"""
|
||||||
|
if start < 0 or end > len(text):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 检查英文方括号 []
|
||||||
|
last_open_en = text.rfind('[', 0, start)
|
||||||
|
if last_open_en != -1:
|
||||||
|
close_before_en = text.rfind(']', 0, start)
|
||||||
|
if close_before_en == -1 or close_before_en < last_open_en:
|
||||||
|
close_after_en = text.find(']', end)
|
||||||
|
if close_after_en != -1:
|
||||||
|
content = text[last_open_en + 1:close_after_en]
|
||||||
|
if _check_bracket_content(content):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 检查中文方括号 【】
|
||||||
|
last_open_cn = text.rfind('【', 0, start)
|
||||||
|
if last_open_cn != -1:
|
||||||
|
close_before_cn = text.rfind('】', 0, start)
|
||||||
|
if close_before_cn == -1 or close_before_cn < last_open_cn:
|
||||||
|
close_after_cn = text.find('】', end)
|
||||||
|
if close_after_cn != -1:
|
||||||
|
content = text[last_open_cn + 1:close_after_cn]
|
||||||
|
if _check_bracket_content(content):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _check_bracket_content(content):
|
||||||
|
"""
|
||||||
|
检查括号内容是否应该被排除
|
||||||
|
"""
|
||||||
|
# 检查是否包含字母
|
||||||
|
has_letters = bool(re.search(r'[A-Za-z]', content))
|
||||||
|
if not has_letters:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 如果是 E/e 或 EP/ep/Ep 格式,则允许通过
|
||||||
|
if re.match(r'^[Ee][Pp]?\d+$', content):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
||||||
"""
|
"""
|
||||||
从文件名中提取剧集编号
|
从文件名中提取剧集编号
|
||||||
@ -430,6 +497,11 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
# 首先去除文件扩展名
|
# 首先去除文件扩展名
|
||||||
file_name_without_ext = os.path.splitext(filename)[0]
|
file_name_without_ext = os.path.splitext(filename)[0]
|
||||||
|
|
||||||
|
# 特判:SxxEyy.zz 模式(例如 S01E11.11),在日期清洗前优先识别
|
||||||
|
m_spec = re.search(r'[Ss](\d+)[Ee](\d{1,2})[._\-/]\d{1,2}', file_name_without_ext)
|
||||||
|
if m_spec:
|
||||||
|
return int(m_spec.group(2))
|
||||||
|
|
||||||
# 预处理:排除文件名中可能是日期的部分,避免误识别
|
# 预处理:排除文件名中可能是日期的部分,避免误识别
|
||||||
date_patterns = [
|
date_patterns = [
|
||||||
# YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 或 YYYY MM DD格式(四位年份)
|
# YYYY-MM-DD 或 YYYY.MM.DD 或 YYYY/MM/DD 或 YYYY MM DD格式(四位年份)
|
||||||
@ -453,6 +525,11 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
for match in matches:
|
for match in matches:
|
||||||
# 检查匹配的内容是否确实是日期
|
# 检查匹配的内容是否确实是日期
|
||||||
date_str = match.group(0)
|
date_str = match.group(0)
|
||||||
|
# 针对短日期 x.x 或 xx.xx:前一字符为 E/e 时不清洗(保护 E11.11 场景)
|
||||||
|
if re.match(r'^\d{1,2}[./-]\d{1,2}$', date_str):
|
||||||
|
prev_char = filename_without_dates[match.start()-1] if match.start() > 0 else ''
|
||||||
|
if prev_char in 'Ee':
|
||||||
|
continue
|
||||||
month = None
|
month = None
|
||||||
day = None
|
day = None
|
||||||
|
|
||||||
@ -496,7 +573,7 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
resolution_patterns = [
|
resolution_patterns = [
|
||||||
r'\b\d+[pP]\b', # 匹配 720p, 1080P, 2160p 等
|
r'\b\d+[pP]\b', # 匹配 720p, 1080P, 2160p 等
|
||||||
r'\b\d+x\d+\b', # 匹配 1920x1080 等
|
r'\b\d+x\d+\b', # 匹配 1920x1080 等
|
||||||
# 注意:不移除4K/8K,避免误删文件名中的4K标识
|
r'(?<!\d)[248]\s*[Kk](?!\d)', # 匹配 2K/4K/8K
|
||||||
]
|
]
|
||||||
|
|
||||||
for pattern in resolution_patterns:
|
for pattern in resolution_patterns:
|
||||||
@ -523,7 +600,9 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
# 其次匹配E01格式
|
# 其次匹配E01格式
|
||||||
match_e = re.search(r'[Ee][Pp]?(\d+)', filename_without_dates)
|
match_e = re.search(r'[Ee][Pp]?(\d+)', filename_without_dates)
|
||||||
if match_e:
|
if match_e:
|
||||||
return int(match_e.group(1))
|
# 若数字位于含字母的中括号内部,跳过该匹配
|
||||||
|
if not _in_alpha_brackets(filename_without_dates, match_e.start(1), match_e.end(1)):
|
||||||
|
return int(match_e.group(1))
|
||||||
|
|
||||||
# 添加中文数字匹配模式(优先匹配)
|
# 添加中文数字匹配模式(优先匹配)
|
||||||
chinese_patterns = [
|
chinese_patterns = [
|
||||||
@ -547,19 +626,6 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 智能4K匹配:检查是否匹配到4K模式,但要验证这个匹配是否合理
|
|
||||||
match_4k = re.search(r'(\d+)[-_\s]*4[Kk]', filename_without_dates)
|
|
||||||
if match_4k:
|
|
||||||
episode_num = int(match_4k.group(1))
|
|
||||||
# 检查文件名中是否已经有明确的剧集标识(中文数字或阿拉伯数字)
|
|
||||||
has_episode_indicator = re.search(r'第[一二三四五六七八九十百千万零两]+[期集话]|第\d+[期集话]', filename_without_dates)
|
|
||||||
if has_episode_indicator:
|
|
||||||
# 如果已经有明确的剧集标识,跳过4K匹配,避免冲突
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# 没有明确的剧集标识,4K匹配有效
|
|
||||||
return episode_num
|
|
||||||
|
|
||||||
# 尝试匹配更多格式(注意:避免匹配季数)
|
# 尝试匹配更多格式(注意:避免匹配季数)
|
||||||
default_patterns = [
|
default_patterns = [
|
||||||
r'第(\d+)集',
|
r'第(\d+)集',
|
||||||
@ -569,25 +635,46 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
r'(?<!第\d+季\s*)(\d+)期', # 避免匹配"第X季 Y期"中的季数
|
r'(?<!第\d+季\s*)(\d+)期', # 避免匹配"第X季 Y期"中的季数
|
||||||
r'(?<!第\d+季\s*)(\d+)话', # 避免匹配"第X季 Y话"中的季数
|
r'(?<!第\d+季\s*)(\d+)话', # 避免匹配"第X季 Y话"中的季数
|
||||||
r'[Ee][Pp]?(\d+)',
|
r'[Ee][Pp]?(\d+)',
|
||||||
r'(\d+)[-_\s]*4[Kk]',
|
|
||||||
r'\[(\d+)\]',
|
r'\[(\d+)\]',
|
||||||
r'【(\d+)】',
|
r'【(\d+)】',
|
||||||
r'_?(\d+)_?'
|
# 中文数字匹配模式
|
||||||
|
r'第([一二三四五六七八九十百千万零两]+)集',
|
||||||
|
r'第([一二三四五六七八九十百千万零两]+)期',
|
||||||
|
r'第([一二三四五六七八九十百千万零两]+)话',
|
||||||
|
r'([一二三四五六七八九十百千万零两]+)集',
|
||||||
|
r'([一二三四五六七八九十百千万零两]+)期',
|
||||||
|
r'([一二三四五六七八九十百千万零两]+)话',
|
||||||
|
# 先匹配"前方有分隔符"的数字,避免后一个规则优先命中单字符
|
||||||
|
r'[- _\s\.]([0-9]+)(?:[^0-9]|$)',
|
||||||
|
r'(?:^|[^0-9])(\d+)(?=[- _\s\.][^0-9])'
|
||||||
]
|
]
|
||||||
|
|
||||||
patterns = None
|
# 构建最终的patterns:默认模式 + 用户补充模式
|
||||||
|
patterns = []
|
||||||
|
|
||||||
|
# 1. 首先添加默认模式(除了最后的纯数字模式)
|
||||||
|
default_non_numeric = [p for p in default_patterns if not re.match(r'^[- _\\s\\.]\([0-9]+\)', p) and not re.match(r'^\([^)]*\)\([0-9]+\)', p)]
|
||||||
|
patterns.extend(default_non_numeric)
|
||||||
|
|
||||||
|
# 2. 添加用户补充的模式
|
||||||
|
user_patterns = []
|
||||||
|
|
||||||
# 检查传入的episode_patterns参数
|
# 检查传入的episode_patterns参数
|
||||||
if episode_patterns:
|
if episode_patterns:
|
||||||
patterns = [p.get("regex", "(\\d+)") for p in episode_patterns]
|
user_patterns = [p.get("regex", "(\\d+)") for p in episode_patterns if p.get("regex", "").strip()]
|
||||||
# 如果配置了task的自定义规则,优先使用
|
# 如果配置了task的自定义规则
|
||||||
elif config_data and isinstance(config_data.get("episode_patterns"), list) and config_data["episode_patterns"]:
|
elif config_data and isinstance(config_data.get("episode_patterns"), list) and config_data["episode_patterns"]:
|
||||||
patterns = [p.get("regex", "(\\d+)") for p in config_data["episode_patterns"]]
|
user_patterns = [p.get("regex", "(\\d+)") for p in config_data["episode_patterns"] if p.get("regex", "").strip()]
|
||||||
# 尝试从全局配置获取
|
# 尝试从全局配置获取
|
||||||
elif 'CONFIG_DATA' in globals() and isinstance(globals()['CONFIG_DATA'].get("episode_patterns"), list) and globals()['CONFIG_DATA']["episode_patterns"]:
|
elif 'CONFIG_DATA' in globals() and isinstance(globals()['CONFIG_DATA'].get("episode_patterns"), list) and globals()['CONFIG_DATA']["episode_patterns"]:
|
||||||
patterns = [p.get("regex", "(\\d+)") for p in globals()['CONFIG_DATA']["episode_patterns"]]
|
user_patterns = [p.get("regex", "(\\d+)") for p in globals()['CONFIG_DATA']["episode_patterns"] if p.get("regex", "").strip()]
|
||||||
else:
|
|
||||||
patterns = default_patterns
|
# 添加用户补充的模式
|
||||||
|
patterns.extend(user_patterns)
|
||||||
|
|
||||||
|
# 3. 最后添加默认的纯数字模式
|
||||||
|
default_numeric = [p for p in default_patterns if re.match(r'^[- _\\s\\.]\([0-9]+\)', p) or re.match(r'^\([^)]*\)\([0-9]+\)', p)]
|
||||||
|
patterns.extend(default_numeric)
|
||||||
|
|
||||||
# 尝试使用每个正则表达式匹配文件名(使用不含日期的文件名)
|
# 尝试使用每个正则表达式匹配文件名(使用不含日期的文件名)
|
||||||
for pattern_regex in patterns:
|
for pattern_regex in patterns:
|
||||||
@ -623,6 +710,10 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
# 遍历所有捕获组,找到第一个非空的
|
# 遍历所有捕获组,找到第一个非空的
|
||||||
for group_num in range(1, len(match.groups()) + 1):
|
for group_num in range(1, len(match.groups()) + 1):
|
||||||
if match.group(group_num):
|
if match.group(group_num):
|
||||||
|
# 若数字位于含字母的中括号内部,跳过
|
||||||
|
span_l, span_r = match.start(group_num), match.end(group_num)
|
||||||
|
if _in_alpha_brackets(filename_without_dates, span_l, span_r):
|
||||||
|
continue
|
||||||
episode_num = int(match.group(group_num))
|
episode_num = int(match.group(group_num))
|
||||||
|
|
||||||
# 检查提取的数字是否可能是日期的一部分
|
# 检查提取的数字是否可能是日期的一部分
|
||||||
@ -646,6 +737,10 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
# 单一模式的正则表达式
|
# 单一模式的正则表达式
|
||||||
match = re.search(pattern_regex, filename_without_dates)
|
match = re.search(pattern_regex, filename_without_dates)
|
||||||
if match:
|
if match:
|
||||||
|
# 若数字位于含字母的中括号内部,跳过
|
||||||
|
span_l, span_r = match.start(1), match.end(1)
|
||||||
|
if _in_alpha_brackets(filename_without_dates, span_l, span_r):
|
||||||
|
continue
|
||||||
episode_num = int(match.group(1))
|
episode_num = int(match.group(1))
|
||||||
|
|
||||||
# 检查提取的数字是否可能是日期的一部分
|
# 检查提取的数字是否可能是日期的一部分
|
||||||
@ -661,16 +756,27 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
|
|||||||
return int(filename_without_dates)
|
return int(filename_without_dates)
|
||||||
|
|
||||||
# 最后尝试提取任何数字,但要排除日期可能性
|
# 最后尝试提取任何数字,但要排除日期可能性
|
||||||
num_match = re.search(r'(\d+)', filename_without_dates)
|
candidates = []
|
||||||
if num_match:
|
for m in re.finditer(r'\\d+', filename_without_dates):
|
||||||
episode_num = int(num_match.group(1))
|
num_str = m.group(0)
|
||||||
# 检查提取的数字是否可能是日期
|
# 过滤日期模式
|
||||||
if not is_date_format(str(episode_num)):
|
if is_date_format(num_str):
|
||||||
# 检查是否是过大的数字(可能是时间戳、文件大小等)
|
continue
|
||||||
if episode_num > 9999:
|
# 过滤中括号内且含字母的片段
|
||||||
return None # 跳过过大的数字
|
span_l, span_r = m.start(), m.end()
|
||||||
return episode_num
|
if _in_alpha_brackets(filename_without_dates, span_l, span_r):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
value = int(num_str)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if value > 9999:
|
||||||
|
continue
|
||||||
|
candidates.append((m.start(), value))
|
||||||
|
if candidates:
|
||||||
|
candidates.sort(key=lambda x: x[0])
|
||||||
|
return candidates[0][1]
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 全局变量
|
# 全局变量
|
||||||
@ -684,7 +790,7 @@ NOTIFYS = []
|
|||||||
def is_date_format(number_str):
|
def is_date_format(number_str):
|
||||||
"""
|
"""
|
||||||
判断一个纯数字字符串是否可能是日期格式
|
判断一个纯数字字符串是否可能是日期格式
|
||||||
支持的格式:YYYYMMDD, MMDD, YYMMDD
|
支持的格式:YYYYMMDD, YYMMDD
|
||||||
"""
|
"""
|
||||||
# 判断YYYYMMDD格式 (8位数字)
|
# 判断YYYYMMDD格式 (8位数字)
|
||||||
if len(number_str) == 8 and number_str.startswith('20'):
|
if len(number_str) == 8 and number_str.startswith('20'):
|
||||||
@ -708,16 +814,8 @@ def is_date_format(number_str):
|
|||||||
# 可能是日期格式
|
# 可能是日期格式
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# 判断MMDD格式 (4位数字)
|
# 不再将 4 位纯数字按 MMDD 视为日期,避免误伤集号(如 1124)
|
||||||
elif len(number_str) == 4:
|
|
||||||
month = int(number_str[:2])
|
|
||||||
day = int(number_str[2:4])
|
|
||||||
|
|
||||||
# 简单检查月份和日期是否有效
|
|
||||||
if 1 <= month <= 12 and 1 <= day <= 31:
|
|
||||||
# 可能是日期格式
|
|
||||||
return True
|
|
||||||
|
|
||||||
# 其他格式不视为日期格式
|
# 其他格式不视为日期格式
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -1038,22 +1136,6 @@ class Config:
|
|||||||
if task.get("media_id"):
|
if task.get("media_id"):
|
||||||
del task["media_id"]
|
del task["media_id"]
|
||||||
|
|
||||||
# 添加剧集识别模式配置
|
|
||||||
if not config_data.get("episode_patterns"):
|
|
||||||
print("🔼 添加剧集识别模式配置")
|
|
||||||
config_data["episode_patterns"] = [
|
|
||||||
{"description": "第[]集", "regex": "第(\\d+)集"},
|
|
||||||
{"description": "第[]期", "regex": "第(\\d+)期"},
|
|
||||||
{"description": "第[]话", "regex": "第(\\d+)话"},
|
|
||||||
{"description": "[]集", "regex": "(\\d+)集"},
|
|
||||||
{"description": "[]期", "regex": "(\\d+)期"},
|
|
||||||
{"description": "[]话", "regex": "(\\d+)话"},
|
|
||||||
{"description": "E/EP[]", "regex": "[Ee][Pp]?(\\d+)"},
|
|
||||||
{"description": "[]-4K", "regex": "(\\d+)[-_\\s]*4[Kk]"},
|
|
||||||
{"description": "[[]", "regex": "\\[(\\d+)\\]"},
|
|
||||||
{"description": "【[]】", "regex": "【(\\d+)】"},
|
|
||||||
{"description": "_[]_", "regex": "_?(\\d+)_?"}
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class Quark:
|
class Quark:
|
||||||
@ -3417,6 +3499,9 @@ class Quark:
|
|||||||
episode_pattern = task["episode_naming"]
|
episode_pattern = task["episode_naming"]
|
||||||
regex_pattern = task.get("regex_pattern")
|
regex_pattern = task.get("regex_pattern")
|
||||||
|
|
||||||
|
# 初始化变量
|
||||||
|
already_renamed_files = set() # 用于防止重复重命名
|
||||||
|
|
||||||
# 获取目录文件列表 - 添加这行代码初始化dir_file_list
|
# 获取目录文件列表 - 添加这行代码初始化dir_file_list
|
||||||
savepath = re.sub(r"/{2,}", "/", f"/{task['savepath']}{subdir_path}")
|
savepath = re.sub(r"/{2,}", "/", f"/{task['savepath']}{subdir_path}")
|
||||||
if not self.savepath_fid.get(savepath):
|
if not self.savepath_fid.get(savepath):
|
||||||
@ -3455,8 +3540,10 @@ class Quark:
|
|||||||
|
|
||||||
# 实现序号提取函数
|
# 实现序号提取函数
|
||||||
def extract_episode_number_local(filename):
|
def extract_episode_number_local(filename):
|
||||||
# 使用全局的统一提取函数
|
# 使用全局的统一提取函数,直接使用全局CONFIG_DATA
|
||||||
return extract_episode_number(filename, config_data=task.get("config_data"))
|
if 'CONFIG_DATA' not in globals() or not CONFIG_DATA:
|
||||||
|
return extract_episode_number(filename)
|
||||||
|
return extract_episode_number(filename, config_data=CONFIG_DATA)
|
||||||
|
|
||||||
# 找出已命名的文件列表,避免重复转存
|
# 找出已命名的文件列表,避免重复转存
|
||||||
existing_episode_numbers = set()
|
existing_episode_numbers = set()
|
||||||
|
|||||||
@ -65,9 +65,5 @@
|
|||||||
"enddate": "2099-01-30"
|
"enddate": "2099-01-30"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"episode_patterns": [
|
"episode_patterns": []
|
||||||
{
|
|
||||||
"regex": "第(\\d+)集|第(\\d+)期|第(\\d+)话|(\\d+)集|(\\d+)期|(\\d+)话|[Ee][Pp]?(\\d+)|(\\d+)[-_\\s]*4[Kk]|\\[(\\d+)\\]|【(\\d+)】|_?(\\d+)_?"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user