novel-doomsday-resurgence/tools/clean_duplicate_titles.py

#!/usr/bin/env python3
"""
清理重复的章节标题
"""

import os
import re

CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"

def clean_duplicate_titles(content, filename):
    """
    清理重复的章节标题
    """
    # 从文件名提取章节标题
    match = re.search(r'ch\d+-第\d+章\s+(.+)\.md', filename)
    if not match:
        return content
    
    chapter_title = match.group(1).strip()
    
    lines = content.split('\n')
    if not lines:
        return content
    
    # 第一行应该是章节标题
    if lines[0].startswith('# 第'):
        # 检查第一行是否包含重复的章节标题
        first_line = lines[0]
        title_pattern = f"# 第\\d+章 {re.escape(chapter_title)}"
        
        # 如果第一行已经正确，就检查第二行
        if re.match(title_pattern, first_line):
            # 检查第二行是否也包含章节标题
            if len(lines) > 1 and chapter_title in lines[1]:
                # 移除第二行的重复标题
                lines[1] = lines[1].replace(chapter_title, '').strip()
                # 如果第二行变成空或只剩下空格，移除它
                if not lines[1] or lines[1].isspace():
                    lines.pop(1)
    
    return '\n'.join(lines)

def main():
    print("清理重复的章节标题...")
    
    chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
    
    for filename in sorted(chapter_files):
        filepath = os.path.join(CHAPTERS_DIR, filename)
        
        print(f"处理: {filename}")
        
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
        
        cleaned_content = clean_duplicate_titles(content, filename)
        
        if content != cleaned_content:
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(cleaned_content)
            print(f"  ✓ 已清理重复标题")
        else:
            print(f"  ✓ 无需清理")
    
    print("\n清理完成！")

if __name__ == '__main__':
    main()
章节标题质量改进系统完成 ✅ 修复关键标题问题： 1. 筹码_手动修复 → 致命筹码 2. 修复 → 心灵修复 3. 对峙（2） → 生死对峙 ✅ 创建完整质量检查与修复工具集： 1. chapter_title_qc.py - 标题质量分析系统 2. apply_title_fixes.py - 自动修复工具 3. clean_ai_markers.py - AI标记清理工具 4. final_format_fix.py - 最终格式修复工具 5. improve_all_titles.py - 全面标题改进工具 ✅ 所有29个章节标题质量均已优化，评分A级以上 ✅ 移除爽点分析内容，确保正文纯净 ✅ 提升标题吸引力和阅读体验 2026-03-30 14:53:52 +08:00			`#!/usr/bin/env python3`
			`"""`
			`清理重复的章节标题`
			`"""`

			`import os`
			`import re`

			`CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"`

			`def clean_duplicate_titles(content, filename):`
			`"""`
			`清理重复的章节标题`
			`"""`
			`# 从文件名提取章节标题`
			`match = re.search(r'ch\d+-第\d+章\s+(.+)\.md', filename)`
			`if not match:`
			`return content`

			`chapter_title = match.group(1).strip()`

			`lines = content.split('\n')`
			`if not lines:`
			`return content`

			`# 第一行应该是章节标题`
			`if lines[0].startswith('# 第'):`
			`# 检查第一行是否包含重复的章节标题`
			`first_line = lines[0]`
			`title_pattern = f"# 第\\d+章 {re.escape(chapter_title)}"`

			`# 如果第一行已经正确，就检查第二行`
			`if re.match(title_pattern, first_line):`
			`# 检查第二行是否也包含章节标题`
			`if len(lines) > 1 and chapter_title in lines[1]:`
			`# 移除第二行的重复标题`
			`lines[1] = lines[1].replace(chapter_title, '').strip()`
			`# 如果第二行变成空或只剩下空格，移除它`
			`if not lines[1] or lines[1].isspace():`
			`lines.pop(1)`

			`return '\n'.join(lines)`

			`def main():`
			`print("清理重复的章节标题...")`

			`chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]`

			`for filename in sorted(chapter_files):`
			`filepath = os.path.join(CHAPTERS_DIR, filename)`

			`print(f"处理: {filename}")`

			`with open(filepath, 'r', encoding='utf-8') as f:`
			`content = f.read()`

			`cleaned_content = clean_duplicate_titles(content, filename)`

			`if content != cleaned_content:`
			`with open(filepath, 'w', encoding='utf-8') as f:`
			`f.write(cleaned_content)`
			`print(f" ✓ 已清理重复标题")`
			`else:`
			`print(f" ✓ 无需清理")`

			`print("\n清理完成！")`

			`if __name__ == '__main__':`
			`main()`