343 lines
12 KiB
Python
343 lines
12 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
章节标题质量检查与修复系统
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
|
|||
|
|
|
|||
|
|
class ChapterTitleQC:
|
|||
|
|
def __init__(self):
|
|||
|
|
# 番茄小说优质标题特征
|
|||
|
|
self.good_title_patterns = [
|
|||
|
|
r'^[\u4e00-\u9fa5]{2,8}$', # 2-8个汉字
|
|||
|
|
r'.*[?!,。]?$', # 可以有标点
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 需要修复的标题模式
|
|||
|
|
self.bad_title_patterns = [
|
|||
|
|
(r'_', '标题中不应有下划线'), # 下划线
|
|||
|
|
(r'(\d+)', '不应有括号数字'), # 括号数字
|
|||
|
|
(r'\d+', '标题中不应有数字(除章节号外)'), # 数字
|
|||
|
|
(r'修复|修复版|手动修复|fixed', '不应有技术性词汇'), # 技术词汇
|
|||
|
|
(r'^[\u4e00-\u9fa5]{1,2}$', '标题太短(1-2字)'), # 太短
|
|||
|
|
(r'^[\u4e00-\u9fa5]{9,}$', '标题太长(9字以上)'), # 太长
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 末世重生题材的优质标题词汇库
|
|||
|
|
self.good_keywords = [
|
|||
|
|
# 动作类
|
|||
|
|
'觉醒', '重生', '囤货', '末日', '降临', '爆发', '危机', '求生',
|
|||
|
|
'逃亡', '生存', '挣扎', '抉择', '博弈', '对峙', '谈判', '交易',
|
|||
|
|
'契约', '联盟', '背叛', '复仇', '救赎', '重生', '逆袭',
|
|||
|
|
|
|||
|
|
# 情绪类
|
|||
|
|
'绝望', '希望', '恐惧', '勇气', '疯狂', '冷静', '煎熬', '挣扎',
|
|||
|
|
'痛苦', '解脱', '抉择', '迷茫', '坚定', '愤怒', '悲伤', '喜悦',
|
|||
|
|
|
|||
|
|
# 场景类
|
|||
|
|
'暗流', '冰点', '暗影', '铁锈', '钢渣', '焊火', '铁门', '电网',
|
|||
|
|
'仓库', '基地', '避难所', '安全屋', '堡垒', '防线', '围城',
|
|||
|
|
|
|||
|
|
# 时间类
|
|||
|
|
'倒计时', '最后时刻', '黎明前', '黄昏后', '末日钟', '生死线',
|
|||
|
|
|
|||
|
|
# 冲突类
|
|||
|
|
'对决', '交锋', '冲突', '对抗', '挑战', '考验', '试炼', '陷阱',
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 章节标题改进建议库
|
|||
|
|
self.title_improvements = {
|
|||
|
|
'筹码_手动修复': '致命筹码',
|
|||
|
|
'对峙(2)': '生死对峙',
|
|||
|
|
'焊火': '烈焰焊火',
|
|||
|
|
'铁锈': '锈蚀阴谋',
|
|||
|
|
'钢渣': '钢铁意志',
|
|||
|
|
'铁门': '钢铁之门',
|
|||
|
|
'充电': '能量重启',
|
|||
|
|
'倒计时': '末日倒计时',
|
|||
|
|
'质询': '致命质询',
|
|||
|
|
'断水': '水源危机',
|
|||
|
|
'昏沉': '意识迷途',
|
|||
|
|
'电话': '致命来电',
|
|||
|
|
'煎熬': '生死煎熬',
|
|||
|
|
'钢渣': '熔炉考验',
|
|||
|
|
'赴约': '死亡之约',
|
|||
|
|
'抉择': '命运抉择',
|
|||
|
|
'交付': '生死交付',
|
|||
|
|
'暗影': '暗影重重',
|
|||
|
|
'决断': '生死决断',
|
|||
|
|
'博弈': '末日博弈',
|
|||
|
|
'修复': '心灵修复',
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def analyze_title(self, title):
|
|||
|
|
"""分析标题质量"""
|
|||
|
|
issues = []
|
|||
|
|
score = 100 # 满分100分
|
|||
|
|
|
|||
|
|
# 检查标题长度
|
|||
|
|
if len(title) < 2:
|
|||
|
|
issues.append("标题太短(少于2字)")
|
|||
|
|
score -= 30
|
|||
|
|
elif len(title) > 8:
|
|||
|
|
issues.append("标题太长(超过8字)")
|
|||
|
|
score -= 20
|
|||
|
|
|
|||
|
|
# 检查是否有下划线
|
|||
|
|
if '_' in title:
|
|||
|
|
issues.append("标题包含下划线")
|
|||
|
|
score -= 25
|
|||
|
|
|
|||
|
|
# 检查是否有括号数字
|
|||
|
|
if re.search(r'(\d+)', title):
|
|||
|
|
issues.append("标题包含括号数字")
|
|||
|
|
score -= 25
|
|||
|
|
|
|||
|
|
# 检查是否有技术词汇
|
|||
|
|
tech_words = ['修复', '修复版', '手动修复', 'fixed', '备份', '版本']
|
|||
|
|
for word in tech_words:
|
|||
|
|
if word in title:
|
|||
|
|
issues.append(f"标题包含技术词汇'{word}'")
|
|||
|
|
score -= 30
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# 检查标题吸引力
|
|||
|
|
has_good_keyword = any(keyword in title for keyword in self.good_keywords)
|
|||
|
|
if not has_good_keyword and len(title) >= 2:
|
|||
|
|
issues.append("标题缺乏吸引力关键词")
|
|||
|
|
score -= 15
|
|||
|
|
|
|||
|
|
# 根据问题数量调整分数
|
|||
|
|
if len(issues) > 3:
|
|||
|
|
score -= (len(issues) - 3) * 5
|
|||
|
|
|
|||
|
|
# 确保分数在0-100之间
|
|||
|
|
score = max(0, min(100, score))
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
'title': title,
|
|||
|
|
'score': score,
|
|||
|
|
'issues': issues,
|
|||
|
|
'grade': self.get_grade(score)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def get_grade(self, score):
|
|||
|
|
"""根据分数获取等级"""
|
|||
|
|
if score >= 90:
|
|||
|
|
return "A+ (优秀)"
|
|||
|
|
elif score >= 80:
|
|||
|
|
return "A (良好)"
|
|||
|
|
elif score >= 70:
|
|||
|
|
return "B+ (一般)"
|
|||
|
|
elif score >= 60:
|
|||
|
|
return "B (及格)"
|
|||
|
|
else:
|
|||
|
|
return "C (需要改进)"
|
|||
|
|
|
|||
|
|
def suggest_improvement(self, original_title):
|
|||
|
|
"""提供标题改进建议"""
|
|||
|
|
# 优先使用改进建议库
|
|||
|
|
if original_title in self.title_improvements:
|
|||
|
|
return self.title_improvements[original_title]
|
|||
|
|
|
|||
|
|
# 分析原标题,提供智能建议
|
|||
|
|
suggestions = []
|
|||
|
|
|
|||
|
|
# 如果标题有下划线,移除
|
|||
|
|
if '_' in original_title:
|
|||
|
|
clean_title = original_title.replace('_', '')
|
|||
|
|
suggestions.append(clean_title)
|
|||
|
|
|
|||
|
|
# 如果标题有括号数字,移除
|
|||
|
|
if re.search(r'(\d+)', original_title):
|
|||
|
|
clean_title = re.sub(r'(\d+)', '', original_title)
|
|||
|
|
suggestions.append(clean_title)
|
|||
|
|
|
|||
|
|
# 添加增强词汇
|
|||
|
|
for keyword in self.good_keywords:
|
|||
|
|
if len(original_title) < 4 and keyword not in original_title:
|
|||
|
|
enhanced = f"{original_title}{keyword}"
|
|||
|
|
if len(enhanced) <= 6:
|
|||
|
|
suggestions.append(enhanced)
|
|||
|
|
|
|||
|
|
# 返回最佳建议
|
|||
|
|
if suggestions:
|
|||
|
|
# 选择最短且最有吸引力的建议
|
|||
|
|
suggestions.sort(key=lambda x: (len(x), -sum(1 for k in self.good_keywords if k in x)))
|
|||
|
|
return suggestions[0]
|
|||
|
|
|
|||
|
|
return original_title
|
|||
|
|
|
|||
|
|
def generate_alternative_titles(self, chapter_num, content_preview=""):
|
|||
|
|
"""为章节生成备选标题"""
|
|||
|
|
alternatives = []
|
|||
|
|
|
|||
|
|
# 根据章节内容分析主题
|
|||
|
|
themes = self.analyze_content_themes(content_preview)
|
|||
|
|
|
|||
|
|
# 生成基于主题的标题
|
|||
|
|
for theme in themes[:3]: # 取前3个主题
|
|||
|
|
for keyword in self.good_keywords:
|
|||
|
|
if keyword not in theme and len(theme + keyword) <= 6:
|
|||
|
|
title = f"{theme}{keyword}"
|
|||
|
|
alternatives.append(title)
|
|||
|
|
|
|||
|
|
# 添加一些通用优质标题
|
|||
|
|
generic_titles = [
|
|||
|
|
f"第{chapter_num}次抉择",
|
|||
|
|
f"生死第{chapter_num}关",
|
|||
|
|
f"末日倒计时{chapter_num}",
|
|||
|
|
f"重生第{chapter_num}步",
|
|||
|
|
f"危机第{chapter_num}重",
|
|||
|
|
]
|
|||
|
|
alternatives.extend(generic_titles)
|
|||
|
|
|
|||
|
|
return list(set(alternatives))[:5] # 去重并返回前5个
|
|||
|
|
|
|||
|
|
def analyze_content_themes(self, content):
|
|||
|
|
"""从内容中分析主题"""
|
|||
|
|
themes = []
|
|||
|
|
|
|||
|
|
# 简单关键词提取
|
|||
|
|
theme_keywords = {
|
|||
|
|
'谈判': ['谈判', '对话', '商议', '讨价还价'],
|
|||
|
|
'冲突': ['冲突', '对抗', '战斗', '争执'],
|
|||
|
|
'危机': ['危机', '危险', '威胁', '困境'],
|
|||
|
|
'生存': ['生存', '活下去', '求生', '保命'],
|
|||
|
|
'物资': ['物资', '食物', '水', '药品', '装备'],
|
|||
|
|
'阴谋': ['阴谋', '算计', '陷阱', '诡计'],
|
|||
|
|
'合作': ['合作', '联盟', '联手', '结盟'],
|
|||
|
|
'背叛': ['背叛', '出卖', '背叛', '反目'],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for theme, keywords in theme_keywords.items():
|
|||
|
|
for keyword in keywords:
|
|||
|
|
if keyword in content[:500]: # 只检查前500字
|
|||
|
|
themes.append(theme)
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
return themes if themes else ['未知']
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
print("📚 章节标题质量检查系统")
|
|||
|
|
print("=" * 50)
|
|||
|
|
|
|||
|
|
qc = ChapterTitleQC()
|
|||
|
|
|
|||
|
|
# 获取所有章节文件
|
|||
|
|
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
|
|||
|
|
|
|||
|
|
# 按章节号排序
|
|||
|
|
chapter_files.sort(key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0)
|
|||
|
|
|
|||
|
|
print(f"共发现 {len(chapter_files)} 个章节")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# 分析每个章节标题
|
|||
|
|
results = []
|
|||
|
|
for filename in chapter_files:
|
|||
|
|
filepath = os.path.join(CHAPTERS_DIR, filename)
|
|||
|
|
|
|||
|
|
# 提取章节号和标题
|
|||
|
|
match = re.search(r'ch(\d+)-第\d+章\s+(.+)\.md', filename)
|
|||
|
|
if not match:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
chapter_num = match.group(1)
|
|||
|
|
original_title = match.group(2)
|
|||
|
|
|
|||
|
|
# 读取部分内容用于分析
|
|||
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|||
|
|
content_preview = f.read(500) # 读取前500字
|
|||
|
|
|
|||
|
|
# 分析标题质量
|
|||
|
|
analysis = qc.analyze_title(original_title)
|
|||
|
|
|
|||
|
|
# 获取改进建议
|
|||
|
|
suggested_title = qc.suggest_improvement(original_title)
|
|||
|
|
|
|||
|
|
# 生成备选标题
|
|||
|
|
alternatives = qc.generate_alternative_titles(chapter_num, content_preview)
|
|||
|
|
|
|||
|
|
results.append({
|
|||
|
|
'chapter_num': chapter_num,
|
|||
|
|
'filename': filename,
|
|||
|
|
'original_title': original_title,
|
|||
|
|
'analysis': analysis,
|
|||
|
|
'suggested_title': suggested_title,
|
|||
|
|
'alternatives': alternatives,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 显示结果
|
|||
|
|
print("📊 标题质量报告:")
|
|||
|
|
print("-" * 80)
|
|||
|
|
|
|||
|
|
poor_titles = []
|
|||
|
|
good_titles = []
|
|||
|
|
|
|||
|
|
for result in results:
|
|||
|
|
print(f"第{result['chapter_num']}章: {result['original_title']}")
|
|||
|
|
print(f" 评分: {result['analysis']['score']}/100 ({result['analysis']['grade']})")
|
|||
|
|
|
|||
|
|
if result['analysis']['issues']:
|
|||
|
|
print(f" 问题: {', '.join(result['analysis']['issues'])}")
|
|||
|
|
poor_titles.append(result)
|
|||
|
|
else:
|
|||
|
|
print(f" 状态: ✅ 良好")
|
|||
|
|
good_titles.append(result)
|
|||
|
|
|
|||
|
|
if result['original_title'] != result['suggested_title']:
|
|||
|
|
print(f" 建议: {result['suggested_title']}")
|
|||
|
|
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# 显示统计信息
|
|||
|
|
print("📈 统计信息:")
|
|||
|
|
print(f" 优秀标题: {len([r for r in results if r['analysis']['score'] >= 80])}")
|
|||
|
|
print(f" 需要改进: {len(poor_titles)}")
|
|||
|
|
print(f" 良好标题: {len(good_titles)}")
|
|||
|
|
|
|||
|
|
# 如果需要改进的标题较多,提供修复选项
|
|||
|
|
if poor_titles:
|
|||
|
|
print("\n🔧 需要修复的标题:")
|
|||
|
|
for i, result in enumerate(poor_titles, 1):
|
|||
|
|
print(f" {i}. 第{result['chapter_num']}章: {result['original_title']} → {result['suggested_title']}")
|
|||
|
|
|
|||
|
|
print("\n💡 运行修复命令:")
|
|||
|
|
print(" python3 tools/apply_title_fixes.py")
|
|||
|
|
|
|||
|
|
# 保存报告
|
|||
|
|
save_report(results)
|
|||
|
|
|
|||
|
|
def save_report(results):
|
|||
|
|
"""保存质量检查报告"""
|
|||
|
|
report_path = os.path.join(CHAPTERS_DIR, "../chapter_title_qc_report.md")
|
|||
|
|
|
|||
|
|
with open(report_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write("# 章节标题质量检查报告\n\n")
|
|||
|
|
f.write(f"生成时间: {os.popen('date').read().strip()}\n")
|
|||
|
|
f.write(f"总章节数: {len(results)}\n\n")
|
|||
|
|
|
|||
|
|
f.write("## 标题质量分析\n\n")
|
|||
|
|
f.write("| 章节 | 原标题 | 评分 | 等级 | 问题 | 建议标题 |\n")
|
|||
|
|
f.write("|------|--------|------|------|------|----------|\n")
|
|||
|
|
|
|||
|
|
for result in results:
|
|||
|
|
issues = '; '.join(result['analysis']['issues']) if result['analysis']['issues'] else '无'
|
|||
|
|
suggested = result['suggested_title'] if result['original_title'] != result['suggested_title'] else '无'
|
|||
|
|
|
|||
|
|
f.write(f"| {result['chapter_num']} | {result['original_title']} | {result['analysis']['score']} | {result['analysis']['grade']} | {issues} | {suggested} |\n")
|
|||
|
|
|
|||
|
|
f.write("\n## 改进建议\n\n")
|
|||
|
|
f.write("1. 移除标题中的下划线和技术词汇\n")
|
|||
|
|
f.write("2. 避免使用括号数字\n")
|
|||
|
|
f.write("3. 标题长度建议2-6个字\n")
|
|||
|
|
f.write("4. 使用更具吸引力的关键词\n")
|
|||
|
|
|
|||
|
|
print(f"\n📄 报告已保存: {report_path}")
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
main()
|