novel-doomsday-resurgence/scripts/format_check_fix.py

301 lines
12 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
inkos章节格式检查和自动修复脚本
在inkos生成章节后立即运行确保格式标准化
"""
import os
import re
import json
import sys
from pathlib import Path
from typing import Dict, List, Tuple
class ChapterFormatFixer:
def __init__(self, config_path: str = None):
self.config = self.load_config(config_path)
self.problems_fixed = 0
def load_config(self, config_path: str = None) -> Dict:
"""加载配置"""
if config_path and os.path.exists(config_path):
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
# 默认配置
return {
"chapter_title_map": {
"0002_暗流.md": "暗流涌动",
"0003_仓鼠行动.md": "仓鼠计划",
"0004_粮草先行.md": "物资先行",
"0005_铁壁.md": "钢铁堡垒",
"0006_焊花.md": "焊光四溅",
"0007_骨刺.md": "骨刺警告",
"0008_暗流2.md": "暗流再起",
"0009_对峙.md": "初次对峙",
"0010_倒计时.md": "末日倒计时",
"0011_筹码.md": "生存筹码",
"0012_质询.md": "深夜质询",
"0013_铁锈.md": "铁锈危机",
"0014_断水.md": "断水危机"
},
"format_rules": {
"remove_bold": True,
"fix_headings": True,
"normalize_dialogue": True,
"add_line_breaks": True,
"end_with_separator": True
}
}
def check_chapter(self, file_path: str) -> List[str]:
"""检查章节格式问题"""
problems = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
filename = os.path.basename(file_path)
# 1. 检查加粗格式
if '**' in content:
problems.append("包含加粗格式(**text**)")
# 2. 检查多余的标题格式
if re.search(r'^##+ ', content, re.MULTILINE):
problems.append("包含多余的标题格式(##)")
# 3. 检查章节标题格式
if not re.search(r'^# 第\d+章 ', content, re.MULTILINE):
problems.append("章节标题格式不正确")
# 4. 检查段落格式
lines = content.split('\n')
for i, line in enumerate(lines):
if line.strip() and not line.startswith('#') and len(line.strip()) > 500:
problems.append(f"{i+1}行段落过长")
# 5. 检查章节结尾
if not content.strip().endswith('---'):
problems.append("章节结尾缺少分隔线(---)")
return problems
except Exception as e:
return [f"读取文件时出错: {str(e)}"]
def fix_chapter(self, file_path: str) -> Tuple[bool, str]:
"""修复章节格式"""
try:
filename = os.path.basename(file_path)
# 读取内容
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
original_content = content
fixes_applied = []
# 1. 移除加粗格式
if self.config['format_rules']['remove_bold']:
if '**' in content:
content = content.replace('**', '')
fixes_applied.append("移除加粗格式")
# 2. 修复标题格式
if self.config['format_rules']['fix_headings']:
# 移除多余的##
content = re.sub(r'^##+ (.+)$', r'\1', content, flags=re.MULTILINE)
# 确保章节标题格式正确
chapter_match = re.search(r'^# (第\d+章 )?(.+)$', content, re.MULTILINE)
if chapter_match:
chapter_num = filename.split('_')[0]
chapter_num_int = int(chapter_num)
# 获取优化后的标题
new_title = self.config['chapter_title_map'].get(filename, chapter_match.group(2))
# 重建标题
new_header = f"# 第{chapter_num_int}{new_title}"
content = re.sub(r'^# .+$', new_header, content, count=1, flags=re.MULTILINE)
fixes_applied.append(f"优化标题为: {new_title}")
# 3. 标准化对话格式
if self.config['format_rules']['normalize_dialogue']:
# 确保对话有合理分段
content = re.sub(r'([^"\n])(")([^"]+)(")([^"\n])', r'\1\n\n\2\3\4\n\n\5', content)
content = re.sub(r'([^"\n])(“)([^”]+)(”)([^"\n])', r'\1\n\n\2\3\4\n\n\5', content)
fixes_applied.append("标准化对话格式")
# 4. 添加段落分隔
if self.config['format_rules']['add_line_breaks']:
# 确保段落之间有合理空白行
paragraphs = content.split('\n\n')
cleaned_paragraphs = []
for para in paragraphs:
if para.strip():
cleaned_paragraphs.append(para.strip())
content = '\n\n'.join(cleaned_paragraphs)
fixes_applied.append("优化段落分隔")
# 5. 确保章节结尾
if self.config['format_rules']['end_with_separator']:
if not content.strip().endswith('---'):
content = content.rstrip() + '\n\n---'
fixes_applied.append("添加章节结尾分隔线")
# 6. 移除多余空白行
content = re.sub(r'\n{3,}', '\n\n', content)
# 如果内容有变化,保存文件
if content != original_content:
# 创建备份
backup_path = file_path + '.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(original_content)
# 写入修复后的内容
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
self.problems_fixed += 1
return True, f"修复完成: {', '.join(fixes_applied)}"
else:
return False, "无需修复,格式已正确"
except Exception as e:
return False, f"修复时出错: {str(e)}"
def process_directory(self, directory: str) -> Dict:
"""处理整个目录的章节"""
results = {
"total_chapters": 0,
"chapters_fixed": 0,
"problems_found": 0,
"details": []
}
for file_path in Path(directory).glob("*.md"):
if file_path.name.startswith('000'):
results["total_chapters"] += 1
# 检查问题
problems = self.check_chapter(str(file_path))
if problems:
results["problems_found"] += len(problems)
# 尝试修复
fixed, message = self.fix_chapter(str(file_path))
if fixed:
results["chapters_fixed"] += 1
results["details"].append({
"chapter": file_path.name,
"problems": problems,
"fixed": fixed,
"message": message
})
return results
def monitor_and_fix(self, directory: str, interval: int = 60):
"""监控目录并自动修复新章节"""
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class ChapterHandler(FileSystemEventHandler):
def __init__(self, fixer):
self.fixer = fixer
def on_created(self, event):
if event.is_directory:
return
if event.src_path.endswith('.md') and os.path.basename(event.src_path).startswith('000'):
print(f"📝 检测到新章节: {os.path.basename(event.src_path)}")
time.sleep(2) # 等待文件完全写入
# 检查并修复
problems = self.fixer.check_chapter(event.src_path)
if problems:
print(f" 发现格式问题: {problems}")
fixed, message = self.fixer.fix_chapter(event.src_path)
if fixed:
print(f" ✅ 自动修复: {message}")
print(f"🔍 开始监控目录: {directory}")
print(" 等待inkos生成新章节...")
event_handler = ChapterHandler(self)
observer = Observer()
observer.schedule(event_handler, directory, recursive=False)
observer.start()
try:
while True:
time.sleep(interval)
except KeyboardInterrupt:
observer.stop()
observer.join()
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description="inkos章节格式检查和修复工具")
parser.add_argument("action", choices=["check", "fix", "monitor"], help="操作类型")
parser.add_argument("--directory", default="/root/.openclaw/workspace/tomato-novel/books/末日重生-开局囤货十亿物资/chapters", help="章节目录")
parser.add_argument("--config", help="配置文件路径")
args = parser.parse_args()
fixer = ChapterFormatFixer(args.config)
if args.action == "check":
print(f"🔍 检查章节格式: {args.directory}")
results = fixer.process_directory(args.directory)
print(f"\n📊 检查结果:")
print(f" 总章节数: {results['total_chapters']}")
print(f" 发现问题: {results['problems_found']}")
print(f" 已修复章节: {results['chapters_fixed']}")
if results['details']:
print(f"\n📋 详细报告:")
for detail in results['details']:
status = "✅ 已修复" if detail['fixed'] else "⚠️ 未修复"
print(f" {detail['chapter']}: {status}")
if detail['problems']:
print(f" 问题: {', '.join(detail['problems'])}")
if detail['message']:
print(f" 消息: {detail['message']}")
elif args.action == "fix":
print(f"🛠️ 修复章节格式: {args.directory}")
results = fixer.process_directory(args.directory)
print(f"\n✅ 修复完成:")
print(f" 总章节数: {results['total_chapters']}")
print(f" 发现问题: {results['problems_found']}")
print(f" 已修复章节: {results['chapters_fixed']}")
if results['chapters_fixed'] > 0:
print(f"\n📝 修复详情:")
for detail in results['details']:
if detail['fixed']:
print(f" {detail['chapter']}: {detail['message']}")
elif args.action == "monitor":
print("🔄 启动格式监控模式")
print(" 此模式将持续监控目录自动修复inkos生成的新章节")
print(" 按 Ctrl+C 停止监控")
fixer.monitor_and_fix(args.directory)
if __name__ == "__main__":
main()