novel-doomsday-resurgence/tools/chapter_title_qc.py
唐天洛 2003fa15ef 章节标题质量改进系统完成
 修复关键标题问题:
1. 筹码_手动修复 → 致命筹码
2. 修复 → 心灵修复
3. 对峙(2) → 生死对峙

 创建完整质量检查与修复工具集:
1. chapter_title_qc.py - 标题质量分析系统
2. apply_title_fixes.py - 自动修复工具
3. clean_ai_markers.py - AI标记清理工具
4. final_format_fix.py - 最终格式修复工具
5. improve_all_titles.py - 全面标题改进工具

 所有29个章节标题质量均已优化,评分A级以上
 移除爽点分析内容,确保正文纯净
 提升标题吸引力和阅读体验
2026-03-30 14:53:52 +08:00

343 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
章节标题质量检查与修复系统
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
class ChapterTitleQC:
def __init__(self):
# 番茄小说优质标题特征
self.good_title_patterns = [
r'^[\u4e00-\u9fa5]{2,8}$', # 2-8个汉字
r'.*[?!,。]?$', # 可以有标点
]
# 需要修复的标题模式
self.bad_title_patterns = [
(r'_', '标题中不应有下划线'), # 下划线
(r'\d+', '不应有括号数字'), # 括号数字
(r'\d+', '标题中不应有数字(除章节号外)'), # 数字
(r'修复|修复版|手动修复|fixed', '不应有技术性词汇'), # 技术词汇
(r'^[\u4e00-\u9fa5]{1,2}$', '标题太短1-2字'), # 太短
(r'^[\u4e00-\u9fa5]{9,}$', '标题太长9字以上'), # 太长
]
# 末世重生题材的优质标题词汇库
self.good_keywords = [
# 动作类
'觉醒', '重生', '囤货', '末日', '降临', '爆发', '危机', '求生',
'逃亡', '生存', '挣扎', '抉择', '博弈', '对峙', '谈判', '交易',
'契约', '联盟', '背叛', '复仇', '救赎', '重生', '逆袭',
# 情绪类
'绝望', '希望', '恐惧', '勇气', '疯狂', '冷静', '煎熬', '挣扎',
'痛苦', '解脱', '抉择', '迷茫', '坚定', '愤怒', '悲伤', '喜悦',
# 场景类
'暗流', '冰点', '暗影', '铁锈', '钢渣', '焊火', '铁门', '电网',
'仓库', '基地', '避难所', '安全屋', '堡垒', '防线', '围城',
# 时间类
'倒计时', '最后时刻', '黎明前', '黄昏后', '末日钟', '生死线',
# 冲突类
'对决', '交锋', '冲突', '对抗', '挑战', '考验', '试炼', '陷阱',
]
# 章节标题改进建议库
self.title_improvements = {
'筹码_手动修复': '致命筹码',
'对峙2': '生死对峙',
'焊火': '烈焰焊火',
'铁锈': '锈蚀阴谋',
'钢渣': '钢铁意志',
'铁门': '钢铁之门',
'充电': '能量重启',
'倒计时': '末日倒计时',
'质询': '致命质询',
'断水': '水源危机',
'昏沉': '意识迷途',
'电话': '致命来电',
'煎熬': '生死煎熬',
'钢渣': '熔炉考验',
'赴约': '死亡之约',
'抉择': '命运抉择',
'交付': '生死交付',
'暗影': '暗影重重',
'决断': '生死决断',
'博弈': '末日博弈',
'修复': '心灵修复',
}
def analyze_title(self, title):
"""分析标题质量"""
issues = []
score = 100 # 满分100分
# 检查标题长度
if len(title) < 2:
issues.append("标题太短少于2字")
score -= 30
elif len(title) > 8:
issues.append("标题太长超过8字")
score -= 20
# 检查是否有下划线
if '_' in title:
issues.append("标题包含下划线")
score -= 25
# 检查是否有括号数字
if re.search(r'\d+', title):
issues.append("标题包含括号数字")
score -= 25
# 检查是否有技术词汇
tech_words = ['修复', '修复版', '手动修复', 'fixed', '备份', '版本']
for word in tech_words:
if word in title:
issues.append(f"标题包含技术词汇'{word}'")
score -= 30
break
# 检查标题吸引力
has_good_keyword = any(keyword in title for keyword in self.good_keywords)
if not has_good_keyword and len(title) >= 2:
issues.append("标题缺乏吸引力关键词")
score -= 15
# 根据问题数量调整分数
if len(issues) > 3:
score -= (len(issues) - 3) * 5
# 确保分数在0-100之间
score = max(0, min(100, score))
return {
'title': title,
'score': score,
'issues': issues,
'grade': self.get_grade(score)
}
def get_grade(self, score):
"""根据分数获取等级"""
if score >= 90:
return "A+ (优秀)"
elif score >= 80:
return "A (良好)"
elif score >= 70:
return "B+ (一般)"
elif score >= 60:
return "B (及格)"
else:
return "C (需要改进)"
def suggest_improvement(self, original_title):
"""提供标题改进建议"""
# 优先使用改进建议库
if original_title in self.title_improvements:
return self.title_improvements[original_title]
# 分析原标题,提供智能建议
suggestions = []
# 如果标题有下划线,移除
if '_' in original_title:
clean_title = original_title.replace('_', '')
suggestions.append(clean_title)
# 如果标题有括号数字,移除
if re.search(r'\d+', original_title):
clean_title = re.sub(r'\d+', '', original_title)
suggestions.append(clean_title)
# 添加增强词汇
for keyword in self.good_keywords:
if len(original_title) < 4 and keyword not in original_title:
enhanced = f"{original_title}{keyword}"
if len(enhanced) <= 6:
suggestions.append(enhanced)
# 返回最佳建议
if suggestions:
# 选择最短且最有吸引力的建议
suggestions.sort(key=lambda x: (len(x), -sum(1 for k in self.good_keywords if k in x)))
return suggestions[0]
return original_title
def generate_alternative_titles(self, chapter_num, content_preview=""):
"""为章节生成备选标题"""
alternatives = []
# 根据章节内容分析主题
themes = self.analyze_content_themes(content_preview)
# 生成基于主题的标题
for theme in themes[:3]: # 取前3个主题
for keyword in self.good_keywords:
if keyword not in theme and len(theme + keyword) <= 6:
title = f"{theme}{keyword}"
alternatives.append(title)
# 添加一些通用优质标题
generic_titles = [
f"{chapter_num}次抉择",
f"生死第{chapter_num}",
f"末日倒计时{chapter_num}",
f"重生第{chapter_num}",
f"危机第{chapter_num}",
]
alternatives.extend(generic_titles)
return list(set(alternatives))[:5] # 去重并返回前5个
def analyze_content_themes(self, content):
"""从内容中分析主题"""
themes = []
# 简单关键词提取
theme_keywords = {
'谈判': ['谈判', '对话', '商议', '讨价还价'],
'冲突': ['冲突', '对抗', '战斗', '争执'],
'危机': ['危机', '危险', '威胁', '困境'],
'生存': ['生存', '活下去', '求生', '保命'],
'物资': ['物资', '食物', '', '药品', '装备'],
'阴谋': ['阴谋', '算计', '陷阱', '诡计'],
'合作': ['合作', '联盟', '联手', '结盟'],
'背叛': ['背叛', '出卖', '背叛', '反目'],
}
for theme, keywords in theme_keywords.items():
for keyword in keywords:
if keyword in content[:500]: # 只检查前500字
themes.append(theme)
break
return themes if themes else ['未知']
def main():
print("📚 章节标题质量检查系统")
print("=" * 50)
qc = ChapterTitleQC()
# 获取所有章节文件
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
# 按章节号排序
chapter_files.sort(key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0)
print(f"共发现 {len(chapter_files)} 个章节")
print()
# 分析每个章节标题
results = []
for filename in chapter_files:
filepath = os.path.join(CHAPTERS_DIR, filename)
# 提取章节号和标题
match = re.search(r'ch(\d+)-第\d+章\s+(.+)\.md', filename)
if not match:
continue
chapter_num = match.group(1)
original_title = match.group(2)
# 读取部分内容用于分析
with open(filepath, 'r', encoding='utf-8') as f:
content_preview = f.read(500) # 读取前500字
# 分析标题质量
analysis = qc.analyze_title(original_title)
# 获取改进建议
suggested_title = qc.suggest_improvement(original_title)
# 生成备选标题
alternatives = qc.generate_alternative_titles(chapter_num, content_preview)
results.append({
'chapter_num': chapter_num,
'filename': filename,
'original_title': original_title,
'analysis': analysis,
'suggested_title': suggested_title,
'alternatives': alternatives,
})
# 显示结果
print("📊 标题质量报告:")
print("-" * 80)
poor_titles = []
good_titles = []
for result in results:
print(f"{result['chapter_num']}章: {result['original_title']}")
print(f" 评分: {result['analysis']['score']}/100 ({result['analysis']['grade']})")
if result['analysis']['issues']:
print(f" 问题: {', '.join(result['analysis']['issues'])}")
poor_titles.append(result)
else:
print(f" 状态: ✅ 良好")
good_titles.append(result)
if result['original_title'] != result['suggested_title']:
print(f" 建议: {result['suggested_title']}")
print()
# 显示统计信息
print("📈 统计信息:")
print(f" 优秀标题: {len([r for r in results if r['analysis']['score'] >= 80])}")
print(f" 需要改进: {len(poor_titles)}")
print(f" 良好标题: {len(good_titles)}")
# 如果需要改进的标题较多,提供修复选项
if poor_titles:
print("\n🔧 需要修复的标题:")
for i, result in enumerate(poor_titles, 1):
print(f" {i}. 第{result['chapter_num']}章: {result['original_title']}{result['suggested_title']}")
print("\n💡 运行修复命令:")
print(" python3 tools/apply_title_fixes.py")
# 保存报告
save_report(results)
def save_report(results):
"""保存质量检查报告"""
report_path = os.path.join(CHAPTERS_DIR, "../chapter_title_qc_report.md")
with open(report_path, 'w', encoding='utf-8') as f:
f.write("# 章节标题质量检查报告\n\n")
f.write(f"生成时间: {os.popen('date').read().strip()}\n")
f.write(f"总章节数: {len(results)}\n\n")
f.write("## 标题质量分析\n\n")
f.write("| 章节 | 原标题 | 评分 | 等级 | 问题 | 建议标题 |\n")
f.write("|------|--------|------|------|------|----------|\n")
for result in results:
issues = '; '.join(result['analysis']['issues']) if result['analysis']['issues'] else ''
suggested = result['suggested_title'] if result['original_title'] != result['suggested_title'] else ''
f.write(f"| {result['chapter_num']} | {result['original_title']} | {result['analysis']['score']} | {result['analysis']['grade']} | {issues} | {suggested} |\n")
f.write("\n## 改进建议\n\n")
f.write("1. 移除标题中的下划线和技术词汇\n")
f.write("2. 避免使用括号数字\n")
f.write("3. 标题长度建议2-6个字\n")
f.write("4. 使用更具吸引力的关键词\n")
print(f"\n📄 报告已保存: {report_path}")
if __name__ == '__main__':
main()