novel-doomsday-resurgence/tools/final_quality_check.py

#!/usr/bin/env python3
"""
最终质量检查脚本
检查章节格式是否已经修复
"""

import os
import re

CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"

def check_chapter_quality(filename, content):
    """
    检查章节质量
    返回问题列表
    """
    problems = []

    # 1. 检查是否有爽点分析混入正文
    if '展现重生者的先知优势' in content:
        problems.append("仍有'展现重生者的先知优势'在正文中")

    if '利用未来信息获取利益' in content:
        problems.append("仍有'利用未来信息获取利益'在正文中")

    if '谈判桌上，陈末掌握着对手的所有底牌' in content:
        problems.append("仍有重复的'谈判桌上'段落")

    # 2. 检查是否有爽点标题
    if re.search(r'^#\s*【爽点[一二三四五六七八九十]?[：:]', content, re.MULTILINE):
        problems.append("仍有爽点标题在正文中")

    # 3. 检查章节标题格式
    if not content.startswith('# 第'):
        problems.append("章节标题格式不正确")

    # 4. 检查是否有HTML注释中的爽点分析
    if '<!-- 爽点分析：' in content:
        problems.append("仍有HTML注释中的爽点分析")

    # 5. 检查重复的章节标题
    lines = content.split('\n')
    if len(lines) > 1:
        # 检查第二行是否包含章节标题
        title_match = re.search(r'第\d+章\s+(.+)', lines[0])
        if title_match:
            chapter_title = title_match.group(1)
            if chapter_title in lines[1] and len(lines[1]) < 50:
                problems.append("第二行有重复的章节标题")

    return problems

def main():
    print("最终质量检查...\n")

    chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]

    all_problems = []
    good_chapters = 0

    for filename in sorted(chapter_files, key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0):
        filepath = os.path.join(CHAPTERS_DIR, filename)

        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        problems = check_chapter_quality(filename, content)

        if problems:
            print(f"❌ {filename}")
            for problem in problems:
                print(f"   - {problem}")
            all_problems.append((filename, problems))
        else:
            print(f"✅ {filename} - 格式正确")
            good_chapters += 1

    print(f"\n检查完成！")
    print(f"✅ 格式正确的章节: {good_chapters}/{len(chapter_files)}")
    print(f"❌ 需要修复的章节: {len(all_problems)}/{len(chapter_files)}")

    if all_problems:
        print("\n需要修复的文件:")
        for filename, problems in all_problems:
            print(f"  - {filename}: {', '.join(problems)}")

    # 创建修复脚本
    if all_problems:
        create_fix_script(all_problems)

def create_fix_script(problem_files):
    """
    创建修复脚本
    """
    script_content = """#!/usr/bin/env python3
"""
自动修复章节格式问题
"""

import os
import re

CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"

def final_fix_content(content):
    \"\"\"
    最终修复内容
    \"\"\"
    # 1. 移除所有爽点分析内容
    content = re.sub(r'展现重生者的先知优势[，。]?', '', content)
    content = re.sub(r'利用未来信息获取利益[，。]?', '', content)
    content = re.sub(r'谈判桌上，陈末掌握着对手的所有底牌[^。]*。', '', content)

    # 2. 移除爽点标题
    content = re.sub(r'^#\s*【爽点[一二三四五六七八九十]?[：:][^】]+】', '', content, flags=re.MULTILINE)

    # 3. 移除HTML注释中的爽点分析
    lines = content.split('\\n')
    cleaned_lines = []
    skip = False
    for line in lines:
        if '<!-- 爽点分析：' in line:
            skip = True
            continue
        if skip and line.strip() == '-->':
            skip = False
            continue
        if not skip:
            cleaned_lines.append(line)

    content = '\\n'.join(cleaned_lines)

    # 4. 确保章节标题格式正确
    if not content.startswith('# 第'):
        # 从内容中提取章节标题
        first_line = content.split('\\n')[0] if content else ''
        match = re.search(r'第(\\d+)章\\s+(.+)', first_line)
        if match:
            chapter_num = match.group(1)
            chapter_title = match.group(2)
            content = f'# 第{chapter_num}章 {chapter_title}\\n\\n' + '\\n'.join(content.split('\\n')[1:])

    # 5. 清理多余的空白行
    content = re.sub(r'\\n{3,}', '\\n\\n', content)

    return content.strip()

# 需要修复的文件列表
problem_files = %s

for filename in problem_files:
    filepath = os.path.join(CHAPTERS_DIR, filename)

    print(f"修复: {filename}")

    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()

    # 创建备份
    backup_path = filepath + '.final.bak'
    with open(backup_path, 'w', encoding='utf-8') as f:
        f.write(content)

    # 应用修复
    fixed_content = final_fix_content(content)

    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(fixed_content)

    print(f"  ✓ 已修复")

print("\\n所有文件修复完成！")
""" % str([filename for filename, _ in problem_files])

    script_path = os.path.join(CHAPTERS_DIR, "tools", "apply_final_fixes.py")
    with open(script_path, 'w', encoding='utf-8') as f:
        f.write(script_content)

    print(f"\n已创建修复脚本: tools/apply_final_fixes.py")
    print(f"运行命令: cd /root/.openclaw/workspace/projects/末日重生_囤货 && python3 tools/apply_final_fixes.py")

if __name__ == '__main__':
    main()