novel-doomsday-resurgence/tools/clean_duplicate_titles.py

#!/usr/bin/env python3
"""
清理重复的章节标题
"""

import os
import re

CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"

def clean_duplicate_titles(content, filename):
    """
    清理重复的章节标题
    """
    # 从文件名提取章节标题
    match = re.search(r'ch\d+-第\d+章\s+(.+)\.md', filename)
    if not match:
        return content

    chapter_title = match.group(1).strip()

    lines = content.split('\n')
    if not lines:
        return content

    # 第一行应该是章节标题
    if lines[0].startswith('# 第'):
        # 检查第一行是否包含重复的章节标题
        first_line = lines[0]
        title_pattern = f"# 第\\d+章 {re.escape(chapter_title)}"

        # 如果第一行已经正确，就检查第二行
        if re.match(title_pattern, first_line):
            # 检查第二行是否也包含章节标题
            if len(lines) > 1 and chapter_title in lines[1]:
                # 移除第二行的重复标题
                lines[1] = lines[1].replace(chapter_title, '').strip()
                # 如果第二行变成空或只剩下空格，移除它
                if not lines[1] or lines[1].isspace():
                    lines.pop(1)

    return '\n'.join(lines)

def main():
    print("清理重复的章节标题...")

    chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]

    for filename in sorted(chapter_files):
        filepath = os.path.join(CHAPTERS_DIR, filename)

        print(f"处理: {filename}")

        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        cleaned_content = clean_duplicate_titles(content, filename)

        if content != cleaned_content:
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(cleaned_content)
            print(f"  ✓ 已清理重复标题")
        else:
            print(f"  ✓ 无需清理")

    print("\n清理完成！")

if __name__ == '__main__':
    main()