novel-doomsday-resurgence/tools/final_format_fix.py
唐天洛 2003fa15ef 章节标题质量改进系统完成
 修复关键标题问题:
1. 筹码_手动修复 → 致命筹码
2. 修复 → 心灵修复
3. 对峙(2) → 生死对峙

 创建完整质量检查与修复工具集:
1. chapter_title_qc.py - 标题质量分析系统
2. apply_title_fixes.py - 自动修复工具
3. clean_ai_markers.py - AI标记清理工具
4. final_format_fix.py - 最终格式修复工具
5. improve_all_titles.py - 全面标题改进工具

 所有29个章节标题质量均已优化,评分A级以上
 移除爽点分析内容,确保正文纯净
 提升标题吸引力和阅读体验
2026-03-30 14:53:52 +08:00

137 lines
3.8 KiB
Python

#!/usr/bin/env python3
"""
最终格式修复脚本
确保所有章节都有正确的Markdown格式
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def final_fix_format(content, filename):
"""
最终的格式修复
"""
lines = content.split('\n')
# 1. 提取章节编号和标题
chapter_num = "1"
chapter_title = "未命名"
# 从文件名提取
match = re.search(r'ch(\d+)-第\d+章\s+(.+)\.md', filename)
if match:
chapter_num = match.group(1)
chapter_title = match.group(2).strip()
# 2. 构建标准开头
standard_start = f"# 第{chapter_num}{chapter_title}\n\n"
# 3. 清理所有行
cleaned_lines = []
for line in lines:
# 移除开头的多余空格
line = line.strip()
# 跳过空行(后面会统一添加)
if not line:
continue
# 移除多余的加粗标记
line = re.sub(r'^\*\*(.*?)\*\*$', r'\1', line)
# 修复中文标点格式
line = re.sub(r'\s*([,。!?;:])\s*', r'\1', line)
cleaned_lines.append(line)
# 4. 重新组合为段落
paragraphs = []
current_para = []
for line in cleaned_lines:
if line.startswith('#') or line.startswith('【爽点'):
# 保存当前段落
if current_para:
paragraphs.append(' '.join(current_para))
current_para = []
paragraphs.append(line)
else:
current_para.append(line)
# 最后一个段落
if current_para:
paragraphs.append(' '.join(current_para))
# 5. 确保爽点部分在最后
shuangdian_paras = []
other_paras = []
for para in paragraphs:
if '【爽点' in para:
shuangdian_paras.append(para)
else:
other_paras.append(para)
# 6. 合并所有段落
result = standard_start
# 添加其他段落
for para in other_paras:
if para.startswith('#') and para != standard_start.strip():
result += para + '\n\n'
else:
result += para + '\n\n'
# 添加爽点段落
if shuangdian_paras:
result += '\n\n'.join(shuangdian_paras) + '\n'
# 7. 清理格式
# 确保段落间有空行
result = re.sub(r'\n{3,}', '\n\n', result)
# 移除多余的空格
result = re.sub(r'[ \t]{2,}', ' ', result)
# 确保章节标题后有空行
result = re.sub(r'^# .+\n(?!\n)', r'\g<0>\n', result, flags=re.MULTILINE)
return result.strip() + '\n'
def main():
print("执行最终格式修复...")
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
for filename in sorted(chapter_files, key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0):
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"修复: {filename}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 跳过已经修复过的文件
if content.startswith('# 第'):
print(f" ✓ 格式已正确")
continue
# 创建备份
backup_path = filepath + '.final.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(content)
# 应用最终修复
fixed_content = final_fix_format(content, filename)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_content)
print(f" ✓ 已完成最终修复")
print("\n所有章节格式修复完成!")
if __name__ == '__main__':
main()