novel-doomsday-resurgence/tools/final_quality_check.py
唐天洛 2003fa15ef 章节标题质量改进系统完成
 修复关键标题问题:
1. 筹码_手动修复 → 致命筹码
2. 修复 → 心灵修复
3. 对峙(2) → 生死对峙

 创建完整质量检查与修复工具集:
1. chapter_title_qc.py - 标题质量分析系统
2. apply_title_fixes.py - 自动修复工具
3. clean_ai_markers.py - AI标记清理工具
4. final_format_fix.py - 最终格式修复工具
5. improve_all_titles.py - 全面标题改进工具

 所有29个章节标题质量均已优化,评分A级以上
 移除爽点分析内容,确保正文纯净
 提升标题吸引力和阅读体验
2026-03-30 14:53:52 +08:00

183 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
最终质量检查脚本
检查章节格式是否已经修复
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def check_chapter_quality(filename, content):
"""
检查章节质量
返回问题列表
"""
problems = []
# 1. 检查是否有爽点分析混入正文
if '展现重生者的先知优势' in content:
problems.append("仍有'展现重生者的先知优势'在正文中")
if '利用未来信息获取利益' in content:
problems.append("仍有'利用未来信息获取利益'在正文中")
if '谈判桌上,陈末掌握着对手的所有底牌' in content:
problems.append("仍有重复的'谈判桌上'段落")
# 2. 检查是否有爽点标题
if re.search(r'^#\s*【爽点[一二三四五六七八九十]?[:]', content, re.MULTILINE):
problems.append("仍有爽点标题在正文中")
# 3. 检查章节标题格式
if not content.startswith('# 第'):
problems.append("章节标题格式不正确")
# 4. 检查是否有HTML注释中的爽点分析
if '<!-- 爽点分析:' in content:
problems.append("仍有HTML注释中的爽点分析")
# 5. 检查重复的章节标题
lines = content.split('\n')
if len(lines) > 1:
# 检查第二行是否包含章节标题
title_match = re.search(r'\d+章\s+(.+)', lines[0])
if title_match:
chapter_title = title_match.group(1)
if chapter_title in lines[1] and len(lines[1]) < 50:
problems.append("第二行有重复的章节标题")
return problems
def main():
print("最终质量检查...\n")
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
all_problems = []
good_chapters = 0
for filename in sorted(chapter_files, key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0):
filepath = os.path.join(CHAPTERS_DIR, filename)
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
problems = check_chapter_quality(filename, content)
if problems:
print(f"{filename}")
for problem in problems:
print(f" - {problem}")
all_problems.append((filename, problems))
else:
print(f"{filename} - 格式正确")
good_chapters += 1
print(f"\n检查完成!")
print(f"✅ 格式正确的章节: {good_chapters}/{len(chapter_files)}")
print(f"❌ 需要修复的章节: {len(all_problems)}/{len(chapter_files)}")
if all_problems:
print("\n需要修复的文件:")
for filename, problems in all_problems:
print(f" - {filename}: {', '.join(problems)}")
# 创建修复脚本
if all_problems:
create_fix_script(all_problems)
def create_fix_script(problem_files):
"""
创建修复脚本
"""
script_content = """#!/usr/bin/env python3
"""
自动修复章节格式问题
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def final_fix_content(content):
\"\"\"
最终修复内容
\"\"\"
# 1. 移除所有爽点分析内容
content = re.sub(r'展现重生者的先知优势[,。]?', '', content)
content = re.sub(r'利用未来信息获取利益[,。]?', '', content)
content = re.sub(r'谈判桌上,陈末掌握着对手的所有底牌[^。]*。', '', content)
# 2. 移除爽点标题
content = re.sub(r'^#\s*【爽点[一二三四五六七八九十]?[:][^】]+】', '', content, flags=re.MULTILINE)
# 3. 移除HTML注释中的爽点分析
lines = content.split('\\n')
cleaned_lines = []
skip = False
for line in lines:
if '<!-- 爽点分析:' in line:
skip = True
continue
if skip and line.strip() == '-->':
skip = False
continue
if not skip:
cleaned_lines.append(line)
content = '\\n'.join(cleaned_lines)
# 4. 确保章节标题格式正确
if not content.startswith('# 第'):
# 从内容中提取章节标题
first_line = content.split('\\n')[0] if content else ''
match = re.search(r'第(\\d+)章\\s+(.+)', first_line)
if match:
chapter_num = match.group(1)
chapter_title = match.group(2)
content = f'# 第{chapter_num}{chapter_title}\\n\\n' + '\\n'.join(content.split('\\n')[1:])
# 5. 清理多余的空白行
content = re.sub(r'\\n{3,}', '\\n\\n', content)
return content.strip()
# 需要修复的文件列表
problem_files = %s
for filename in problem_files:
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"修复: {filename}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 创建备份
backup_path = filepath + '.final.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(content)
# 应用修复
fixed_content = final_fix_content(content)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_content)
print(f" ✓ 已修复")
print("\\n所有文件修复完成")
""" % str([filename for filename, _ in problem_files])
script_path = os.path.join(CHAPTERS_DIR, "tools", "apply_final_fixes.py")
with open(script_path, 'w', encoding='utf-8') as f:
f.write(script_content)
print(f"\n已创建修复脚本: tools/apply_final_fixes.py")
print(f"运行命令: cd /root/.openclaw/workspace/projects/末日重生_囤货 && python3 tools/apply_final_fixes.py")
if __name__ == '__main__':
main()