novel-doomsday-resurgence/tools/final_quality_check.py

183 lines
5.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
最终质量检查脚本
检查章节格式是否已经修复
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def check_chapter_quality(filename, content):
"""
检查章节质量
返回问题列表
"""
problems = []
# 1. 检查是否有爽点分析混入正文
if '展现重生者的先知优势' in content:
problems.append("仍有'展现重生者的先知优势'在正文中")
if '利用未来信息获取利益' in content:
problems.append("仍有'利用未来信息获取利益'在正文中")
if '谈判桌上,陈末掌握着对手的所有底牌' in content:
problems.append("仍有重复的'谈判桌上'段落")
# 2. 检查是否有爽点标题
if re.search(r'^#\s*【爽点[一二三四五六七八九十]?[:]', content, re.MULTILINE):
problems.append("仍有爽点标题在正文中")
# 3. 检查章节标题格式
if not content.startswith('# 第'):
problems.append("章节标题格式不正确")
# 4. 检查是否有HTML注释中的爽点分析
if '<!-- 爽点分析:' in content:
problems.append("仍有HTML注释中的爽点分析")
# 5. 检查重复的章节标题
lines = content.split('\n')
if len(lines) > 1:
# 检查第二行是否包含章节标题
title_match = re.search(r'\d+章\s+(.+)', lines[0])
if title_match:
chapter_title = title_match.group(1)
if chapter_title in lines[1] and len(lines[1]) < 50:
problems.append("第二行有重复的章节标题")
return problems
def main():
print("最终质量检查...\n")
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
all_problems = []
good_chapters = 0
for filename in sorted(chapter_files, key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0):
filepath = os.path.join(CHAPTERS_DIR, filename)
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
problems = check_chapter_quality(filename, content)
if problems:
print(f"{filename}")
for problem in problems:
print(f" - {problem}")
all_problems.append((filename, problems))
else:
print(f"{filename} - 格式正确")
good_chapters += 1
print(f"\n检查完成!")
print(f"✅ 格式正确的章节: {good_chapters}/{len(chapter_files)}")
print(f"❌ 需要修复的章节: {len(all_problems)}/{len(chapter_files)}")
if all_problems:
print("\n需要修复的文件:")
for filename, problems in all_problems:
print(f" - {filename}: {', '.join(problems)}")
# 创建修复脚本
if all_problems:
create_fix_script(all_problems)
def create_fix_script(problem_files):
"""
创建修复脚本
"""
script_content = """#!/usr/bin/env python3
"""
自动修复章节格式问题
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def final_fix_content(content):
\"\"\"
最终修复内容
\"\"\"
# 1. 移除所有爽点分析内容
content = re.sub(r'展现重生者的先知优势[,。]?', '', content)
content = re.sub(r'利用未来信息获取利益[,。]?', '', content)
content = re.sub(r'谈判桌上,陈末掌握着对手的所有底牌[^。]*。', '', content)
# 2. 移除爽点标题
content = re.sub(r'^#\s*【爽点[一二三四五六七八九十]?[:][^】]+】', '', content, flags=re.MULTILINE)
# 3. 移除HTML注释中的爽点分析
lines = content.split('\\n')
cleaned_lines = []
skip = False
for line in lines:
if '<!-- 爽点分析:' in line:
skip = True
continue
if skip and line.strip() == '-->':
skip = False
continue
if not skip:
cleaned_lines.append(line)
content = '\\n'.join(cleaned_lines)
# 4. 确保章节标题格式正确
if not content.startswith('# 第'):
# 从内容中提取章节标题
first_line = content.split('\\n')[0] if content else ''
match = re.search(r'第(\\d+)章\\s+(.+)', first_line)
if match:
chapter_num = match.group(1)
chapter_title = match.group(2)
content = f'# 第{chapter_num}{chapter_title}\\n\\n' + '\\n'.join(content.split('\\n')[1:])
# 5. 清理多余的空白行
content = re.sub(r'\\n{3,}', '\\n\\n', content)
return content.strip()
# 需要修复的文件列表
problem_files = %s
for filename in problem_files:
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"修复: {filename}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 创建备份
backup_path = filepath + '.final.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(content)
# 应用修复
fixed_content = final_fix_content(content)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_content)
print(f" ✓ 已修复")
print("\\n所有文件修复完成")
""" % str([filename for filename, _ in problem_files])
script_path = os.path.join(CHAPTERS_DIR, "tools", "apply_final_fixes.py")
with open(script_path, 'w', encoding='utf-8') as f:
f.write(script_content)
print(f"\n已创建修复脚本: tools/apply_final_fixes.py")
print(f"运行命令: cd /root/.openclaw/workspace/projects/末日重生_囤货 && python3 tools/apply_final_fixes.py")
if __name__ == '__main__':
main()