novel-doomsday-resurgence/scripts/format_check_fix.py

301 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
inkos章节格式检查和自动修复脚本
在inkos生成章节后立即运行确保格式标准化
"""
import os
import re
import json
import sys
from pathlib import Path
from typing import Dict, List, Tuple
class ChapterFormatFixer:
def __init__(self, config_path: str = None):
self.config = self.load_config(config_path)
self.problems_fixed = 0
def load_config(self, config_path: str = None) -> Dict:
"""加载配置"""
if config_path and os.path.exists(config_path):
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
# 默认配置
return {
"chapter_title_map": {
"0002_暗流.md": "暗流涌动",
"0003_仓鼠行动.md": "仓鼠计划",
"0004_粮草先行.md": "物资先行",
"0005_铁壁.md": "钢铁堡垒",
"0006_焊花.md": "焊光四溅",
"0007_骨刺.md": "骨刺警告",
"0008_暗流2.md": "暗流再起",
"0009_对峙.md": "初次对峙",
"0010_倒计时.md": "末日倒计时",
"0011_筹码.md": "生存筹码",
"0012_质询.md": "深夜质询",
"0013_铁锈.md": "铁锈危机",
"0014_断水.md": "断水危机"
},
"format_rules": {
"remove_bold": True,
"fix_headings": True,
"normalize_dialogue": True,
"add_line_breaks": True,
"end_with_separator": True
}
}
def check_chapter(self, file_path: str) -> List[str]:
"""检查章节格式问题"""
problems = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
filename = os.path.basename(file_path)
# 1. 检查加粗格式
if '**' in content:
problems.append("包含加粗格式(**text**)")
# 2. 检查多余的标题格式
if re.search(r'^##+ ', content, re.MULTILINE):
problems.append("包含多余的标题格式(##)")
# 3. 检查章节标题格式
if not re.search(r'^# 第\d+章 ', content, re.MULTILINE):
problems.append("章节标题格式不正确")
# 4. 检查段落格式
lines = content.split('\n')
for i, line in enumerate(lines):
if line.strip() and not line.startswith('#') and len(line.strip()) > 500:
problems.append(f"{i+1}行段落过长")
# 5. 检查章节结尾
if not content.strip().endswith('---'):
problems.append("章节结尾缺少分隔线(---)")
return problems
except Exception as e:
return [f"读取文件时出错: {str(e)}"]
def fix_chapter(self, file_path: str) -> Tuple[bool, str]:
"""修复章节格式"""
try:
filename = os.path.basename(file_path)
# 读取内容
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
original_content = content
fixes_applied = []
# 1. 移除加粗格式
if self.config['format_rules']['remove_bold']:
if '**' in content:
content = content.replace('**', '')
fixes_applied.append("移除加粗格式")
# 2. 修复标题格式
if self.config['format_rules']['fix_headings']:
# 移除多余的##
content = re.sub(r'^##+ (.+)$', r'\1', content, flags=re.MULTILINE)
# 确保章节标题格式正确
chapter_match = re.search(r'^# (第\d+章 )?(.+)$', content, re.MULTILINE)
if chapter_match:
chapter_num = filename.split('_')[0]
chapter_num_int = int(chapter_num)
# 获取优化后的标题
new_title = self.config['chapter_title_map'].get(filename, chapter_match.group(2))
# 重建标题
new_header = f"# 第{chapter_num_int}{new_title}"
content = re.sub(r'^# .+$', new_header, content, count=1, flags=re.MULTILINE)
fixes_applied.append(f"优化标题为: {new_title}")
# 3. 标准化对话格式
if self.config['format_rules']['normalize_dialogue']:
# 确保对话有合理分段
content = re.sub(r'([^"\n])(")([^"]+)(")([^"\n])', r'\1\n\n\2\3\4\n\n\5', content)
content = re.sub(r'([^"\n])(“)([^”]+)(”)([^"\n])', r'\1\n\n\2\3\4\n\n\5', content)
fixes_applied.append("标准化对话格式")
# 4. 添加段落分隔
if self.config['format_rules']['add_line_breaks']:
# 确保段落之间有合理空白行
paragraphs = content.split('\n\n')
cleaned_paragraphs = []
for para in paragraphs:
if para.strip():
cleaned_paragraphs.append(para.strip())
content = '\n\n'.join(cleaned_paragraphs)
fixes_applied.append("优化段落分隔")
# 5. 确保章节结尾
if self.config['format_rules']['end_with_separator']:
if not content.strip().endswith('---'):
content = content.rstrip() + '\n\n---'
fixes_applied.append("添加章节结尾分隔线")
# 6. 移除多余空白行
content = re.sub(r'\n{3,}', '\n\n', content)
# 如果内容有变化,保存文件
if content != original_content:
# 创建备份
backup_path = file_path + '.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(original_content)
# 写入修复后的内容
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
self.problems_fixed += 1
return True, f"修复完成: {', '.join(fixes_applied)}"
else:
return False, "无需修复,格式已正确"
except Exception as e:
return False, f"修复时出错: {str(e)}"
def process_directory(self, directory: str) -> Dict:
"""处理整个目录的章节"""
results = {
"total_chapters": 0,
"chapters_fixed": 0,
"problems_found": 0,
"details": []
}
for file_path in Path(directory).glob("*.md"):
if file_path.name.startswith('000'):
results["total_chapters"] += 1
# 检查问题
problems = self.check_chapter(str(file_path))
if problems:
results["problems_found"] += len(problems)
# 尝试修复
fixed, message = self.fix_chapter(str(file_path))
if fixed:
results["chapters_fixed"] += 1
results["details"].append({
"chapter": file_path.name,
"problems": problems,
"fixed": fixed,
"message": message
})
return results
def monitor_and_fix(self, directory: str, interval: int = 60):
"""监控目录并自动修复新章节"""
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class ChapterHandler(FileSystemEventHandler):
def __init__(self, fixer):
self.fixer = fixer
def on_created(self, event):
if event.is_directory:
return
if event.src_path.endswith('.md') and os.path.basename(event.src_path).startswith('000'):
print(f"📝 检测到新章节: {os.path.basename(event.src_path)}")
time.sleep(2) # 等待文件完全写入
# 检查并修复
problems = self.fixer.check_chapter(event.src_path)
if problems:
print(f" 发现格式问题: {problems}")
fixed, message = self.fixer.fix_chapter(event.src_path)
if fixed:
print(f" ✅ 自动修复: {message}")
print(f"🔍 开始监控目录: {directory}")
print(" 等待inkos生成新章节...")
event_handler = ChapterHandler(self)
observer = Observer()
observer.schedule(event_handler, directory, recursive=False)
observer.start()
try:
while True:
time.sleep(interval)
except KeyboardInterrupt:
observer.stop()
observer.join()
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description="inkos章节格式检查和修复工具")
parser.add_argument("action", choices=["check", "fix", "monitor"], help="操作类型")
parser.add_argument("--directory", default="/root/.openclaw/workspace/tomato-novel/books/末日重生-开局囤货十亿物资/chapters", help="章节目录")
parser.add_argument("--config", help="配置文件路径")
args = parser.parse_args()
fixer = ChapterFormatFixer(args.config)
if args.action == "check":
print(f"🔍 检查章节格式: {args.directory}")
results = fixer.process_directory(args.directory)
print(f"\n📊 检查结果:")
print(f" 总章节数: {results['total_chapters']}")
print(f" 发现问题: {results['problems_found']}")
print(f" 已修复章节: {results['chapters_fixed']}")
if results['details']:
print(f"\n📋 详细报告:")
for detail in results['details']:
status = "✅ 已修复" if detail['fixed'] else "⚠️ 未修复"
print(f" {detail['chapter']}: {status}")
if detail['problems']:
print(f" 问题: {', '.join(detail['problems'])}")
if detail['message']:
print(f" 消息: {detail['message']}")
elif args.action == "fix":
print(f"🛠️ 修复章节格式: {args.directory}")
results = fixer.process_directory(args.directory)
print(f"\n✅ 修复完成:")
print(f" 总章节数: {results['total_chapters']}")
print(f" 发现问题: {results['problems_found']}")
print(f" 已修复章节: {results['chapters_fixed']}")
if results['chapters_fixed'] > 0:
print(f"\n📝 修复详情:")
for detail in results['details']:
if detail['fixed']:
print(f" {detail['chapter']}: {detail['message']}")
elif args.action == "monitor":
print("🔄 启动格式监控模式")
print(" 此模式将持续监控目录自动修复inkos生成的新章节")
print(" 按 Ctrl+C 停止监控")
fixer.monitor_and_fix(args.directory)
if __name__ == "__main__":
main()