novel-doomsday-resurgence/tools/chapter_title_qc.py

343 lines
12 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
章节标题质量检查与修复系统
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
class ChapterTitleQC:
def __init__(self):
# 番茄小说优质标题特征
self.good_title_patterns = [
r'^[\u4e00-\u9fa5]{2,8}$', # 2-8个汉字
r'.*[?!,。]?$', # 可以有标点
]
# 需要修复的标题模式
self.bad_title_patterns = [
(r'_', '标题中不应有下划线'), # 下划线
(r'\d+', '不应有括号数字'), # 括号数字
(r'\d+', '标题中不应有数字(除章节号外)'), # 数字
(r'修复|修复版|手动修复|fixed', '不应有技术性词汇'), # 技术词汇
(r'^[\u4e00-\u9fa5]{1,2}$', '标题太短1-2字'), # 太短
(r'^[\u4e00-\u9fa5]{9,}$', '标题太长9字以上'), # 太长
]
# 末世重生题材的优质标题词汇库
self.good_keywords = [
# 动作类
'觉醒', '重生', '囤货', '末日', '降临', '爆发', '危机', '求生',
'逃亡', '生存', '挣扎', '抉择', '博弈', '对峙', '谈判', '交易',
'契约', '联盟', '背叛', '复仇', '救赎', '重生', '逆袭',
# 情绪类
'绝望', '希望', '恐惧', '勇气', '疯狂', '冷静', '煎熬', '挣扎',
'痛苦', '解脱', '抉择', '迷茫', '坚定', '愤怒', '悲伤', '喜悦',
# 场景类
'暗流', '冰点', '暗影', '铁锈', '钢渣', '焊火', '铁门', '电网',
'仓库', '基地', '避难所', '安全屋', '堡垒', '防线', '围城',
# 时间类
'倒计时', '最后时刻', '黎明前', '黄昏后', '末日钟', '生死线',
# 冲突类
'对决', '交锋', '冲突', '对抗', '挑战', '考验', '试炼', '陷阱',
]
# 章节标题改进建议库
self.title_improvements = {
'筹码_手动修复': '致命筹码',
'对峙2': '生死对峙',
'焊火': '烈焰焊火',
'铁锈': '锈蚀阴谋',
'钢渣': '钢铁意志',
'铁门': '钢铁之门',
'充电': '能量重启',
'倒计时': '末日倒计时',
'质询': '致命质询',
'断水': '水源危机',
'昏沉': '意识迷途',
'电话': '致命来电',
'煎熬': '生死煎熬',
'钢渣': '熔炉考验',
'赴约': '死亡之约',
'抉择': '命运抉择',
'交付': '生死交付',
'暗影': '暗影重重',
'决断': '生死决断',
'博弈': '末日博弈',
'修复': '心灵修复',
}
def analyze_title(self, title):
"""分析标题质量"""
issues = []
score = 100 # 满分100分
# 检查标题长度
if len(title) < 2:
issues.append("标题太短少于2字")
score -= 30
elif len(title) > 8:
issues.append("标题太长超过8字")
score -= 20
# 检查是否有下划线
if '_' in title:
issues.append("标题包含下划线")
score -= 25
# 检查是否有括号数字
if re.search(r'\d+', title):
issues.append("标题包含括号数字")
score -= 25
# 检查是否有技术词汇
tech_words = ['修复', '修复版', '手动修复', 'fixed', '备份', '版本']
for word in tech_words:
if word in title:
issues.append(f"标题包含技术词汇'{word}'")
score -= 30
break
# 检查标题吸引力
has_good_keyword = any(keyword in title for keyword in self.good_keywords)
if not has_good_keyword and len(title) >= 2:
issues.append("标题缺乏吸引力关键词")
score -= 15
# 根据问题数量调整分数
if len(issues) > 3:
score -= (len(issues) - 3) * 5
# 确保分数在0-100之间
score = max(0, min(100, score))
return {
'title': title,
'score': score,
'issues': issues,
'grade': self.get_grade(score)
}
def get_grade(self, score):
"""根据分数获取等级"""
if score >= 90:
return "A+ (优秀)"
elif score >= 80:
return "A (良好)"
elif score >= 70:
return "B+ (一般)"
elif score >= 60:
return "B (及格)"
else:
return "C (需要改进)"
def suggest_improvement(self, original_title):
"""提供标题改进建议"""
# 优先使用改进建议库
if original_title in self.title_improvements:
return self.title_improvements[original_title]
# 分析原标题,提供智能建议
suggestions = []
# 如果标题有下划线,移除
if '_' in original_title:
clean_title = original_title.replace('_', '')
suggestions.append(clean_title)
# 如果标题有括号数字,移除
if re.search(r'\d+', original_title):
clean_title = re.sub(r'\d+', '', original_title)
suggestions.append(clean_title)
# 添加增强词汇
for keyword in self.good_keywords:
if len(original_title) < 4 and keyword not in original_title:
enhanced = f"{original_title}{keyword}"
if len(enhanced) <= 6:
suggestions.append(enhanced)
# 返回最佳建议
if suggestions:
# 选择最短且最有吸引力的建议
suggestions.sort(key=lambda x: (len(x), -sum(1 for k in self.good_keywords if k in x)))
return suggestions[0]
return original_title
def generate_alternative_titles(self, chapter_num, content_preview=""):
"""为章节生成备选标题"""
alternatives = []
# 根据章节内容分析主题
themes = self.analyze_content_themes(content_preview)
# 生成基于主题的标题
for theme in themes[:3]: # 取前3个主题
for keyword in self.good_keywords:
if keyword not in theme and len(theme + keyword) <= 6:
title = f"{theme}{keyword}"
alternatives.append(title)
# 添加一些通用优质标题
generic_titles = [
f"{chapter_num}次抉择",
f"生死第{chapter_num}",
f"末日倒计时{chapter_num}",
f"重生第{chapter_num}",
f"危机第{chapter_num}",
]
alternatives.extend(generic_titles)
return list(set(alternatives))[:5] # 去重并返回前5个
def analyze_content_themes(self, content):
"""从内容中分析主题"""
themes = []
# 简单关键词提取
theme_keywords = {
'谈判': ['谈判', '对话', '商议', '讨价还价'],
'冲突': ['冲突', '对抗', '战斗', '争执'],
'危机': ['危机', '危险', '威胁', '困境'],
'生存': ['生存', '活下去', '求生', '保命'],
'物资': ['物资', '食物', '', '药品', '装备'],
'阴谋': ['阴谋', '算计', '陷阱', '诡计'],
'合作': ['合作', '联盟', '联手', '结盟'],
'背叛': ['背叛', '出卖', '背叛', '反目'],
}
for theme, keywords in theme_keywords.items():
for keyword in keywords:
if keyword in content[:500]: # 只检查前500字
themes.append(theme)
break
return themes if themes else ['未知']
def main():
print("📚 章节标题质量检查系统")
print("=" * 50)
qc = ChapterTitleQC()
# 获取所有章节文件
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
# 按章节号排序
chapter_files.sort(key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0)
print(f"共发现 {len(chapter_files)} 个章节")
print()
# 分析每个章节标题
results = []
for filename in chapter_files:
filepath = os.path.join(CHAPTERS_DIR, filename)
# 提取章节号和标题
match = re.search(r'ch(\d+)-第\d+章\s+(.+)\.md', filename)
if not match:
continue
chapter_num = match.group(1)
original_title = match.group(2)
# 读取部分内容用于分析
with open(filepath, 'r', encoding='utf-8') as f:
content_preview = f.read(500) # 读取前500字
# 分析标题质量
analysis = qc.analyze_title(original_title)
# 获取改进建议
suggested_title = qc.suggest_improvement(original_title)
# 生成备选标题
alternatives = qc.generate_alternative_titles(chapter_num, content_preview)
results.append({
'chapter_num': chapter_num,
'filename': filename,
'original_title': original_title,
'analysis': analysis,
'suggested_title': suggested_title,
'alternatives': alternatives,
})
# 显示结果
print("📊 标题质量报告:")
print("-" * 80)
poor_titles = []
good_titles = []
for result in results:
print(f"{result['chapter_num']}章: {result['original_title']}")
print(f" 评分: {result['analysis']['score']}/100 ({result['analysis']['grade']})")
if result['analysis']['issues']:
print(f" 问题: {', '.join(result['analysis']['issues'])}")
poor_titles.append(result)
else:
print(f" 状态: ✅ 良好")
good_titles.append(result)
if result['original_title'] != result['suggested_title']:
print(f" 建议: {result['suggested_title']}")
print()
# 显示统计信息
print("📈 统计信息:")
print(f" 优秀标题: {len([r for r in results if r['analysis']['score'] >= 80])}")
print(f" 需要改进: {len(poor_titles)}")
print(f" 良好标题: {len(good_titles)}")
# 如果需要改进的标题较多,提供修复选项
if poor_titles:
print("\n🔧 需要修复的标题:")
for i, result in enumerate(poor_titles, 1):
print(f" {i}. 第{result['chapter_num']}章: {result['original_title']}{result['suggested_title']}")
print("\n💡 运行修复命令:")
print(" python3 tools/apply_title_fixes.py")
# 保存报告
save_report(results)
def save_report(results):
"""保存质量检查报告"""
report_path = os.path.join(CHAPTERS_DIR, "../chapter_title_qc_report.md")
with open(report_path, 'w', encoding='utf-8') as f:
f.write("# 章节标题质量检查报告\n\n")
f.write(f"生成时间: {os.popen('date').read().strip()}\n")
f.write(f"总章节数: {len(results)}\n\n")
f.write("## 标题质量分析\n\n")
f.write("| 章节 | 原标题 | 评分 | 等级 | 问题 | 建议标题 |\n")
f.write("|------|--------|------|------|------|----------|\n")
for result in results:
issues = '; '.join(result['analysis']['issues']) if result['analysis']['issues'] else ''
suggested = result['suggested_title'] if result['original_title'] != result['suggested_title'] else ''
f.write(f"| {result['chapter_num']} | {result['original_title']} | {result['analysis']['score']} | {result['analysis']['grade']} | {issues} | {suggested} |\n")
f.write("\n## 改进建议\n\n")
f.write("1. 移除标题中的下划线和技术词汇\n")
f.write("2. 避免使用括号数字\n")
f.write("3. 标题长度建议2-6个字\n")
f.write("4. 使用更具吸引力的关键词\n")
print(f"\n📄 报告已保存: {report_path}")
if __name__ == '__main__':
main()