扩展对中文数字的支持情况

This commit is contained in:
x1ao4 2025-06-19 21:07:47 +08:00
parent 2ff4071a35
commit fea41e76fd
2 changed files with 160 additions and 41 deletions

View File

@ -27,6 +27,7 @@ import os
import re
import random
import time
import treelib
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, parent_dir)
@ -35,19 +36,41 @@ from quark_auto_save import Config, format_bytes
# 添加导入全局extract_episode_number和sort_file_by_name函数
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from quark_auto_save import extract_episode_number, sort_file_by_name
from quark_auto_save import extract_episode_number, sort_file_by_name, chinese_to_arabic, is_date_format
# 导入数据库模块
try:
from app.sdk.db import RecordDB
# 先尝试相对导入
from sdk.db import RecordDB
except ImportError:
# 如果没有数据库模块,定义一个空类
class RecordDB:
def __init__(self, *args, **kwargs):
pass
def get_records(self, *args, **kwargs):
return {"records": [], "pagination": {"total_records": 0, "total_pages": 0, "current_page": 1, "page_size": 20}}
try:
# 如果相对导入失败尝试从app包导入
from app.sdk.db import RecordDB
except ImportError:
# 如果没有数据库模块,定义一个空类
class RecordDB:
def __init__(self, *args, **kwargs):
pass
def get_records(self, *args, **kwargs):
return {"records": [], "pagination": {"total_records": 0, "total_pages": 0, "current_page": 1, "page_size": 20}}
# 导入工具函数
try:
# 先尝试相对导入
from sdk.utils import format_bytes, get_file_icon, format_file_display
except ImportError:
try:
# 如果相对导入失败尝试从app包导入
from app.sdk.utils import format_bytes, get_file_icon, format_file_display
except ImportError:
# 如果导入失败使用默认实现或从quark_auto_save导入
# format_bytes已从quark_auto_save导入
def get_file_icon(file_name, is_dir=False):
return "📄" if not is_dir else "📁"
def format_file_display(prefix, icon, name):
return f"{prefix}{icon} {name}"
def get_app_ver():
@ -463,37 +486,6 @@ def get_task_suggestions():
return jsonify({"success": True, "message": f"error: {str(e)}"})
# 添加函数,与主程序保持一致
def is_date_format(number_str):
"""
判断一个纯数字字符串是否可能是日期格式
支持的格式YYYYMMDD, MMDD
"""
# 判断YYYYMMDD格式 (8位数字)
if len(number_str) == 8 and number_str.startswith('20'):
year = int(number_str[:4])
month = int(number_str[4:6])
day = int(number_str[6:8])
# 简单检查月份和日期是否有效
if 1 <= month <= 12 and 1 <= day <= 31:
# 可能是日期格式
return True
# 判断MMDD格式 (4位数字)
elif len(number_str) == 4:
month = int(number_str[:2])
day = int(number_str[2:4])
# 简单检查月份和日期是否有效
if 1 <= month <= 12 and 1 <= day <= 31:
# 可能是日期格式
return True
# 其他长度的纯数字不视为日期格式
return False
# 获取分享详情接口
@app.route("/get_share_detail", methods=["GET", "POST"])
def get_share_detail():
@ -599,6 +591,22 @@ def get_share_detail():
episode_pattern = regex.get("episode_naming")
episode_patterns = regex.get("episode_patterns", [])
# 添加中文数字匹配模式
chinese_patterns = [
{"regex": r'第([一二三四五六七八九十百千万零两]+)集'},
{"regex": r'第([一二三四五六七八九十百千万零两]+)期'},
{"regex": r'第([一二三四五六七八九十百千万零两]+)话'},
{"regex": r'([一二三四五六七八九十百千万零两]+)集'},
{"regex": r'([一二三四五六七八九十百千万零两]+)期'},
{"regex": r'([一二三四五六七八九十百千万零两]+)话'}
]
# 合并中文模式到episode_patterns
if episode_patterns:
episode_patterns.extend(chinese_patterns)
else:
episode_patterns = chinese_patterns
# 调用全局的集编号提取函数
def extract_episode_number_local(filename):
return extract_episode_number(filename, episode_patterns=episode_patterns)

View File

@ -141,12 +141,30 @@ def sort_file_by_name(file):
if match_chinese:
episode_value = int(match_chinese.group(1))
# 2.1.1 "第[中文数字]期/集/话" 格式
if episode_value == float('inf'):
match_chinese_num = re.search(r'第([一二三四五六七八九十百千万零两]+)[期集话]', filename)
if match_chinese_num:
chinese_num = match_chinese_num.group(1)
arabic_num = chinese_to_arabic(chinese_num)
if arabic_num is not None:
episode_value = arabic_num
# 2.2 "X集/期/话" 格式
if episode_value == float('inf'):
match_chinese_simple = re.search(r'(\d+)[期集话]', filename)
if match_chinese_simple:
episode_value = int(match_chinese_simple.group(1))
# 2.2.1 "[中文数字]集/期/话" 格式
if episode_value == float('inf'):
match_chinese_simple_num = re.search(r'([一二三四五六七八九十百千万零两]+)[期集话]', filename)
if match_chinese_simple_num:
chinese_num = match_chinese_simple_num.group(1)
arabic_num = chinese_to_arabic(chinese_num)
if arabic_num is not None:
episode_value = arabic_num
# 2.3 S01E01格式
if episode_value == float('inf'):
match_s_e = re.search(r'[Ss](\d+)[Ee](\d+)', filename)
@ -299,6 +317,16 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
r'_?(\d+)_?'
]
# 添加中文数字匹配模式
chinese_patterns = [
r'第([一二三四五六七八九十百千万零两]+)集',
r'第([一二三四五六七八九十百千万零两]+)期',
r'第([一二三四五六七八九十百千万零两]+)话',
r'([一二三四五六七八九十百千万零两]+)集',
r'([一二三四五六七八九十百千万零两]+)期',
r'([一二三四五六七八九十百千万零两]+)话'
]
patterns = None
# 检查传入的episode_patterns参数
@ -328,7 +356,19 @@ def extract_episode_number(filename, episode_patterns=None, config_data=None):
return episode_num
except:
continue
# 尝试匹配中文数字模式
for pattern_regex in chinese_patterns:
try:
match = re.search(pattern_regex, filename_without_dates)
if match:
chinese_num = match.group(1)
arabic_num = chinese_to_arabic(chinese_num)
if arabic_num is not None:
return arabic_num
except:
continue
# 如果从不含日期的文件名中没有找到剧集号,尝试从原始文件名中提取
# 这是为了兼容某些特殊情况,但要检查提取的数字不是日期
file_name_without_ext = os.path.splitext(filename)[0]
@ -395,6 +435,72 @@ def is_date_format(number_str):
# 其他格式不视为日期格式
return False
def chinese_to_arabic(chinese):
"""
将中文数字转换为阿拉伯数字
支持格式
以及特殊处理为2
Args:
chinese: 中文数字字符串
Returns:
int: 转换后的阿拉伯数字如果无法转换则返回None
"""
if not chinese:
return None
# 数字映射
digit_map = {
'': 0, '': 1, '': 2, '': 3, '': 4,
'': 5, '': 6, '': 7, '': 8, '': 9,
'': 2, '': 10
}
# 单位映射
unit_map = {
'': 10,
'': 100,
'': 1000,
'': 10000
}
# 如果是单个字符,直接返回对应数字
if len(chinese) == 1:
return digit_map.get(chinese)
# 如果只有"十"
if chinese == '':
return 10
result = 0
temp = 0
unit = 1
# 从右向左处理
for i in range(len(chinese) - 1, -1, -1):
char = chinese[i]
# 处理数字
if char in digit_map and char != '':
temp = digit_map[char]
result += temp * unit
unit = 1 # 重置单位
# 处理单位
elif char in unit_map:
if char == '' and i == 0: # 处理"十X"的情况
result += 10 + digit_map.get(chinese[1], 0)
break
else:
unit = unit_map[char]
if i == 0: # 如果单位在最前面,如"十三"则前面默认为1
result += unit
else:
# 非法字符
return None
return result
# 兼容青龙
try:
from treelib import Tree
@ -4258,6 +4364,11 @@ def do_save(account, tasklist=[]):
number_part = filename[len(prefix):].split(suffix)[0] if suffix else filename[len(prefix):]
if number_part.isdigit():
return int(number_part)
# 尝试转换中文数字
else:
arabic_num = chinese_to_arabic(number_part)
if arabic_num is not None:
return arabic_num
# 如果所有方法都失败返回float('inf')
return float('inf')