#!/usr/bin/env python3 """ 章节标题质量检查与修复系统 """ import os import re CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters" class ChapterTitleQC: def __init__(self): # 番茄小说优质标题特征 self.good_title_patterns = [ r'^[\u4e00-\u9fa5]{2,8}$', # 2-8个汉字 r'.*[?!,。]?$', # 可以有标点 ] # 需要修复的标题模式 self.bad_title_patterns = [ (r'_', '标题中不应有下划线'), # 下划线 (r'(\d+)', '不应有括号数字'), # 括号数字 (r'\d+', '标题中不应有数字(除章节号外)'), # 数字 (r'修复|修复版|手动修复|fixed', '不应有技术性词汇'), # 技术词汇 (r'^[\u4e00-\u9fa5]{1,2}$', '标题太短(1-2字)'), # 太短 (r'^[\u4e00-\u9fa5]{9,}$', '标题太长(9字以上)'), # 太长 ] # 末世重生题材的优质标题词汇库 self.good_keywords = [ # 动作类 '觉醒', '重生', '囤货', '末日', '降临', '爆发', '危机', '求生', '逃亡', '生存', '挣扎', '抉择', '博弈', '对峙', '谈判', '交易', '契约', '联盟', '背叛', '复仇', '救赎', '重生', '逆袭', # 情绪类 '绝望', '希望', '恐惧', '勇气', '疯狂', '冷静', '煎熬', '挣扎', '痛苦', '解脱', '抉择', '迷茫', '坚定', '愤怒', '悲伤', '喜悦', # 场景类 '暗流', '冰点', '暗影', '铁锈', '钢渣', '焊火', '铁门', '电网', '仓库', '基地', '避难所', '安全屋', '堡垒', '防线', '围城', # 时间类 '倒计时', '最后时刻', '黎明前', '黄昏后', '末日钟', '生死线', # 冲突类 '对决', '交锋', '冲突', '对抗', '挑战', '考验', '试炼', '陷阱', ] # 章节标题改进建议库 self.title_improvements = { '筹码_手动修复': '致命筹码', '对峙(2)': '生死对峙', '焊火': '烈焰焊火', '铁锈': '锈蚀阴谋', '钢渣': '钢铁意志', '铁门': '钢铁之门', '充电': '能量重启', '倒计时': '末日倒计时', '质询': '致命质询', '断水': '水源危机', '昏沉': '意识迷途', '电话': '致命来电', '煎熬': '生死煎熬', '钢渣': '熔炉考验', '赴约': '死亡之约', '抉择': '命运抉择', '交付': '生死交付', '暗影': '暗影重重', '决断': '生死决断', '博弈': '末日博弈', '修复': '心灵修复', } def analyze_title(self, title): """分析标题质量""" issues = [] score = 100 # 满分100分 # 检查标题长度 if len(title) < 2: issues.append("标题太短(少于2字)") score -= 30 elif len(title) > 8: issues.append("标题太长(超过8字)") score -= 20 # 检查是否有下划线 if '_' in title: issues.append("标题包含下划线") score -= 25 # 检查是否有括号数字 if re.search(r'(\d+)', title): issues.append("标题包含括号数字") score -= 25 # 检查是否有技术词汇 tech_words = ['修复', '修复版', '手动修复', 'fixed', '备份', '版本'] for word in tech_words: if word in title: issues.append(f"标题包含技术词汇'{word}'") score -= 30 break # 检查标题吸引力 has_good_keyword = any(keyword in title for keyword in self.good_keywords) if not has_good_keyword and len(title) >= 2: issues.append("标题缺乏吸引力关键词") score -= 15 # 根据问题数量调整分数 if len(issues) > 3: score -= (len(issues) - 3) * 5 # 确保分数在0-100之间 score = max(0, min(100, score)) return { 'title': title, 'score': score, 'issues': issues, 'grade': self.get_grade(score) } def get_grade(self, score): """根据分数获取等级""" if score >= 90: return "A+ (优秀)" elif score >= 80: return "A (良好)" elif score >= 70: return "B+ (一般)" elif score >= 60: return "B (及格)" else: return "C (需要改进)" def suggest_improvement(self, original_title): """提供标题改进建议""" # 优先使用改进建议库 if original_title in self.title_improvements: return self.title_improvements[original_title] # 分析原标题,提供智能建议 suggestions = [] # 如果标题有下划线,移除 if '_' in original_title: clean_title = original_title.replace('_', '') suggestions.append(clean_title) # 如果标题有括号数字,移除 if re.search(r'(\d+)', original_title): clean_title = re.sub(r'(\d+)', '', original_title) suggestions.append(clean_title) # 添加增强词汇 for keyword in self.good_keywords: if len(original_title) < 4 and keyword not in original_title: enhanced = f"{original_title}{keyword}" if len(enhanced) <= 6: suggestions.append(enhanced) # 返回最佳建议 if suggestions: # 选择最短且最有吸引力的建议 suggestions.sort(key=lambda x: (len(x), -sum(1 for k in self.good_keywords if k in x))) return suggestions[0] return original_title def generate_alternative_titles(self, chapter_num, content_preview=""): """为章节生成备选标题""" alternatives = [] # 根据章节内容分析主题 themes = self.analyze_content_themes(content_preview) # 生成基于主题的标题 for theme in themes[:3]: # 取前3个主题 for keyword in self.good_keywords: if keyword not in theme and len(theme + keyword) <= 6: title = f"{theme}{keyword}" alternatives.append(title) # 添加一些通用优质标题 generic_titles = [ f"第{chapter_num}次抉择", f"生死第{chapter_num}关", f"末日倒计时{chapter_num}", f"重生第{chapter_num}步", f"危机第{chapter_num}重", ] alternatives.extend(generic_titles) return list(set(alternatives))[:5] # 去重并返回前5个 def analyze_content_themes(self, content): """从内容中分析主题""" themes = [] # 简单关键词提取 theme_keywords = { '谈判': ['谈判', '对话', '商议', '讨价还价'], '冲突': ['冲突', '对抗', '战斗', '争执'], '危机': ['危机', '危险', '威胁', '困境'], '生存': ['生存', '活下去', '求生', '保命'], '物资': ['物资', '食物', '水', '药品', '装备'], '阴谋': ['阴谋', '算计', '陷阱', '诡计'], '合作': ['合作', '联盟', '联手', '结盟'], '背叛': ['背叛', '出卖', '背叛', '反目'], } for theme, keywords in theme_keywords.items(): for keyword in keywords: if keyword in content[:500]: # 只检查前500字 themes.append(theme) break return themes if themes else ['未知'] def main(): print("📚 章节标题质量检查系统") print("=" * 50) qc = ChapterTitleQC() # 获取所有章节文件 chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')] # 按章节号排序 chapter_files.sort(key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0) print(f"共发现 {len(chapter_files)} 个章节") print() # 分析每个章节标题 results = [] for filename in chapter_files: filepath = os.path.join(CHAPTERS_DIR, filename) # 提取章节号和标题 match = re.search(r'ch(\d+)-第\d+章\s+(.+)\.md', filename) if not match: continue chapter_num = match.group(1) original_title = match.group(2) # 读取部分内容用于分析 with open(filepath, 'r', encoding='utf-8') as f: content_preview = f.read(500) # 读取前500字 # 分析标题质量 analysis = qc.analyze_title(original_title) # 获取改进建议 suggested_title = qc.suggest_improvement(original_title) # 生成备选标题 alternatives = qc.generate_alternative_titles(chapter_num, content_preview) results.append({ 'chapter_num': chapter_num, 'filename': filename, 'original_title': original_title, 'analysis': analysis, 'suggested_title': suggested_title, 'alternatives': alternatives, }) # 显示结果 print("📊 标题质量报告:") print("-" * 80) poor_titles = [] good_titles = [] for result in results: print(f"第{result['chapter_num']}章: {result['original_title']}") print(f" 评分: {result['analysis']['score']}/100 ({result['analysis']['grade']})") if result['analysis']['issues']: print(f" 问题: {', '.join(result['analysis']['issues'])}") poor_titles.append(result) else: print(f" 状态: ✅ 良好") good_titles.append(result) if result['original_title'] != result['suggested_title']: print(f" 建议: {result['suggested_title']}") print() # 显示统计信息 print("📈 统计信息:") print(f" 优秀标题: {len([r for r in results if r['analysis']['score'] >= 80])}") print(f" 需要改进: {len(poor_titles)}") print(f" 良好标题: {len(good_titles)}") # 如果需要改进的标题较多,提供修复选项 if poor_titles: print("\n🔧 需要修复的标题:") for i, result in enumerate(poor_titles, 1): print(f" {i}. 第{result['chapter_num']}章: {result['original_title']} → {result['suggested_title']}") print("\n💡 运行修复命令:") print(" python3 tools/apply_title_fixes.py") # 保存报告 save_report(results) def save_report(results): """保存质量检查报告""" report_path = os.path.join(CHAPTERS_DIR, "../chapter_title_qc_report.md") with open(report_path, 'w', encoding='utf-8') as f: f.write("# 章节标题质量检查报告\n\n") f.write(f"生成时间: {os.popen('date').read().strip()}\n") f.write(f"总章节数: {len(results)}\n\n") f.write("## 标题质量分析\n\n") f.write("| 章节 | 原标题 | 评分 | 等级 | 问题 | 建议标题 |\n") f.write("|------|--------|------|------|------|----------|\n") for result in results: issues = '; '.join(result['analysis']['issues']) if result['analysis']['issues'] else '无' suggested = result['suggested_title'] if result['original_title'] != result['suggested_title'] else '无' f.write(f"| {result['chapter_num']} | {result['original_title']} | {result['analysis']['score']} | {result['analysis']['grade']} | {issues} | {suggested} |\n") f.write("\n## 改进建议\n\n") f.write("1. 移除标题中的下划线和技术词汇\n") f.write("2. 避免使用括号数字\n") f.write("3. 标题长度建议2-6个字\n") f.write("4. 使用更具吸引力的关键词\n") print(f"\n📄 报告已保存: {report_path}") if __name__ == '__main__': main()