301 lines
12 KiB
Python
301 lines
12 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
inkos章节格式检查和自动修复脚本
|
|||
|
|
在inkos生成章节后立即运行,确保格式标准化
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
import json
|
|||
|
|
import sys
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Dict, List, Tuple
|
|||
|
|
|
|||
|
|
class ChapterFormatFixer:
|
|||
|
|
def __init__(self, config_path: str = None):
|
|||
|
|
self.config = self.load_config(config_path)
|
|||
|
|
self.problems_fixed = 0
|
|||
|
|
|
|||
|
|
def load_config(self, config_path: str = None) -> Dict:
|
|||
|
|
"""加载配置"""
|
|||
|
|
if config_path and os.path.exists(config_path):
|
|||
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|||
|
|
return json.load(f)
|
|||
|
|
|
|||
|
|
# 默认配置
|
|||
|
|
return {
|
|||
|
|
"chapter_title_map": {
|
|||
|
|
"0002_暗流.md": "暗流涌动",
|
|||
|
|
"0003_仓鼠行动.md": "仓鼠计划",
|
|||
|
|
"0004_粮草先行.md": "物资先行",
|
|||
|
|
"0005_铁壁.md": "钢铁堡垒",
|
|||
|
|
"0006_焊花.md": "焊光四溅",
|
|||
|
|
"0007_骨刺.md": "骨刺警告",
|
|||
|
|
"0008_暗流(2).md": "暗流再起",
|
|||
|
|
"0009_对峙.md": "初次对峙",
|
|||
|
|
"0010_倒计时.md": "末日倒计时",
|
|||
|
|
"0011_筹码.md": "生存筹码",
|
|||
|
|
"0012_质询.md": "深夜质询",
|
|||
|
|
"0013_铁锈.md": "铁锈危机",
|
|||
|
|
"0014_断水.md": "断水危机"
|
|||
|
|
},
|
|||
|
|
"format_rules": {
|
|||
|
|
"remove_bold": True,
|
|||
|
|
"fix_headings": True,
|
|||
|
|
"normalize_dialogue": True,
|
|||
|
|
"add_line_breaks": True,
|
|||
|
|
"end_with_separator": True
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def check_chapter(self, file_path: str) -> List[str]:
|
|||
|
|
"""检查章节格式问题"""
|
|||
|
|
problems = []
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
filename = os.path.basename(file_path)
|
|||
|
|
|
|||
|
|
# 1. 检查加粗格式
|
|||
|
|
if '**' in content:
|
|||
|
|
problems.append("包含加粗格式(**text**)")
|
|||
|
|
|
|||
|
|
# 2. 检查多余的标题格式
|
|||
|
|
if re.search(r'^##+ ', content, re.MULTILINE):
|
|||
|
|
problems.append("包含多余的标题格式(##)")
|
|||
|
|
|
|||
|
|
# 3. 检查章节标题格式
|
|||
|
|
if not re.search(r'^# 第\d+章 ', content, re.MULTILINE):
|
|||
|
|
problems.append("章节标题格式不正确")
|
|||
|
|
|
|||
|
|
# 4. 检查段落格式
|
|||
|
|
lines = content.split('\n')
|
|||
|
|
for i, line in enumerate(lines):
|
|||
|
|
if line.strip() and not line.startswith('#') and len(line.strip()) > 500:
|
|||
|
|
problems.append(f"第{i+1}行段落过长")
|
|||
|
|
|
|||
|
|
# 5. 检查章节结尾
|
|||
|
|
if not content.strip().endswith('---'):
|
|||
|
|
problems.append("章节结尾缺少分隔线(---)")
|
|||
|
|
|
|||
|
|
return problems
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
return [f"读取文件时出错: {str(e)}"]
|
|||
|
|
|
|||
|
|
def fix_chapter(self, file_path: str) -> Tuple[bool, str]:
|
|||
|
|
"""修复章节格式"""
|
|||
|
|
try:
|
|||
|
|
filename = os.path.basename(file_path)
|
|||
|
|
|
|||
|
|
# 读取内容
|
|||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
original_content = content
|
|||
|
|
fixes_applied = []
|
|||
|
|
|
|||
|
|
# 1. 移除加粗格式
|
|||
|
|
if self.config['format_rules']['remove_bold']:
|
|||
|
|
if '**' in content:
|
|||
|
|
content = content.replace('**', '')
|
|||
|
|
fixes_applied.append("移除加粗格式")
|
|||
|
|
|
|||
|
|
# 2. 修复标题格式
|
|||
|
|
if self.config['format_rules']['fix_headings']:
|
|||
|
|
# 移除多余的##
|
|||
|
|
content = re.sub(r'^##+ (.+)$', r'\1', content, flags=re.MULTILINE)
|
|||
|
|
|
|||
|
|
# 确保章节标题格式正确
|
|||
|
|
chapter_match = re.search(r'^# (第\d+章 )?(.+)$', content, re.MULTILINE)
|
|||
|
|
if chapter_match:
|
|||
|
|
chapter_num = filename.split('_')[0]
|
|||
|
|
chapter_num_int = int(chapter_num)
|
|||
|
|
|
|||
|
|
# 获取优化后的标题
|
|||
|
|
new_title = self.config['chapter_title_map'].get(filename, chapter_match.group(2))
|
|||
|
|
|
|||
|
|
# 重建标题
|
|||
|
|
new_header = f"# 第{chapter_num_int}章 {new_title}"
|
|||
|
|
content = re.sub(r'^# .+$', new_header, content, count=1, flags=re.MULTILINE)
|
|||
|
|
fixes_applied.append(f"优化标题为: {new_title}")
|
|||
|
|
|
|||
|
|
# 3. 标准化对话格式
|
|||
|
|
if self.config['format_rules']['normalize_dialogue']:
|
|||
|
|
# 确保对话有合理分段
|
|||
|
|
content = re.sub(r'([^"\n])(")([^"]+)(")([^"\n])', r'\1\n\n\2\3\4\n\n\5', content)
|
|||
|
|
content = re.sub(r'([^"\n])(“)([^”]+)(”)([^"\n])', r'\1\n\n\2\3\4\n\n\5', content)
|
|||
|
|
fixes_applied.append("标准化对话格式")
|
|||
|
|
|
|||
|
|
# 4. 添加段落分隔
|
|||
|
|
if self.config['format_rules']['add_line_breaks']:
|
|||
|
|
# 确保段落之间有合理空白行
|
|||
|
|
paragraphs = content.split('\n\n')
|
|||
|
|
cleaned_paragraphs = []
|
|||
|
|
for para in paragraphs:
|
|||
|
|
if para.strip():
|
|||
|
|
cleaned_paragraphs.append(para.strip())
|
|||
|
|
content = '\n\n'.join(cleaned_paragraphs)
|
|||
|
|
fixes_applied.append("优化段落分隔")
|
|||
|
|
|
|||
|
|
# 5. 确保章节结尾
|
|||
|
|
if self.config['format_rules']['end_with_separator']:
|
|||
|
|
if not content.strip().endswith('---'):
|
|||
|
|
content = content.rstrip() + '\n\n---'
|
|||
|
|
fixes_applied.append("添加章节结尾分隔线")
|
|||
|
|
|
|||
|
|
# 6. 移除多余空白行
|
|||
|
|
content = re.sub(r'\n{3,}', '\n\n', content)
|
|||
|
|
|
|||
|
|
# 如果内容有变化,保存文件
|
|||
|
|
if content != original_content:
|
|||
|
|
# 创建备份
|
|||
|
|
backup_path = file_path + '.bak'
|
|||
|
|
with open(backup_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(original_content)
|
|||
|
|
|
|||
|
|
# 写入修复后的内容
|
|||
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(content)
|
|||
|
|
|
|||
|
|
self.problems_fixed += 1
|
|||
|
|
return True, f"修复完成: {', '.join(fixes_applied)}"
|
|||
|
|
else:
|
|||
|
|
return False, "无需修复,格式已正确"
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
return False, f"修复时出错: {str(e)}"
|
|||
|
|
|
|||
|
|
def process_directory(self, directory: str) -> Dict:
|
|||
|
|
"""处理整个目录的章节"""
|
|||
|
|
results = {
|
|||
|
|
"total_chapters": 0,
|
|||
|
|
"chapters_fixed": 0,
|
|||
|
|
"problems_found": 0,
|
|||
|
|
"details": []
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for file_path in Path(directory).glob("*.md"):
|
|||
|
|
if file_path.name.startswith('000'):
|
|||
|
|
results["total_chapters"] += 1
|
|||
|
|
|
|||
|
|
# 检查问题
|
|||
|
|
problems = self.check_chapter(str(file_path))
|
|||
|
|
|
|||
|
|
if problems:
|
|||
|
|
results["problems_found"] += len(problems)
|
|||
|
|
|
|||
|
|
# 尝试修复
|
|||
|
|
fixed, message = self.fix_chapter(str(file_path))
|
|||
|
|
|
|||
|
|
if fixed:
|
|||
|
|
results["chapters_fixed"] += 1
|
|||
|
|
|
|||
|
|
results["details"].append({
|
|||
|
|
"chapter": file_path.name,
|
|||
|
|
"problems": problems,
|
|||
|
|
"fixed": fixed,
|
|||
|
|
"message": message
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
def monitor_and_fix(self, directory: str, interval: int = 60):
|
|||
|
|
"""监控目录并自动修复新章节"""
|
|||
|
|
import time
|
|||
|
|
from watchdog.observers import Observer
|
|||
|
|
from watchdog.events import FileSystemEventHandler
|
|||
|
|
|
|||
|
|
class ChapterHandler(FileSystemEventHandler):
|
|||
|
|
def __init__(self, fixer):
|
|||
|
|
self.fixer = fixer
|
|||
|
|
|
|||
|
|
def on_created(self, event):
|
|||
|
|
if event.is_directory:
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
if event.src_path.endswith('.md') and os.path.basename(event.src_path).startswith('000'):
|
|||
|
|
print(f"📝 检测到新章节: {os.path.basename(event.src_path)}")
|
|||
|
|
time.sleep(2) # 等待文件完全写入
|
|||
|
|
|
|||
|
|
# 检查并修复
|
|||
|
|
problems = self.fixer.check_chapter(event.src_path)
|
|||
|
|
if problems:
|
|||
|
|
print(f" 发现格式问题: {problems}")
|
|||
|
|
fixed, message = self.fixer.fix_chapter(event.src_path)
|
|||
|
|
if fixed:
|
|||
|
|
print(f" ✅ 自动修复: {message}")
|
|||
|
|
|
|||
|
|
print(f"🔍 开始监控目录: {directory}")
|
|||
|
|
print(" 等待inkos生成新章节...")
|
|||
|
|
|
|||
|
|
event_handler = ChapterHandler(self)
|
|||
|
|
observer = Observer()
|
|||
|
|
observer.schedule(event_handler, directory, recursive=False)
|
|||
|
|
observer.start()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
while True:
|
|||
|
|
time.sleep(interval)
|
|||
|
|
except KeyboardInterrupt:
|
|||
|
|
observer.stop()
|
|||
|
|
|
|||
|
|
observer.join()
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
"""主函数"""
|
|||
|
|
import argparse
|
|||
|
|
|
|||
|
|
parser = argparse.ArgumentParser(description="inkos章节格式检查和修复工具")
|
|||
|
|
parser.add_argument("action", choices=["check", "fix", "monitor"], help="操作类型")
|
|||
|
|
parser.add_argument("--directory", default="/root/.openclaw/workspace/tomato-novel/books/末日重生-开局囤货十亿物资/chapters", help="章节目录")
|
|||
|
|
parser.add_argument("--config", help="配置文件路径")
|
|||
|
|
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
fixer = ChapterFormatFixer(args.config)
|
|||
|
|
|
|||
|
|
if args.action == "check":
|
|||
|
|
print(f"🔍 检查章节格式: {args.directory}")
|
|||
|
|
results = fixer.process_directory(args.directory)
|
|||
|
|
|
|||
|
|
print(f"\n📊 检查结果:")
|
|||
|
|
print(f" 总章节数: {results['total_chapters']}")
|
|||
|
|
print(f" 发现问题: {results['problems_found']}个")
|
|||
|
|
print(f" 已修复章节: {results['chapters_fixed']}个")
|
|||
|
|
|
|||
|
|
if results['details']:
|
|||
|
|
print(f"\n📋 详细报告:")
|
|||
|
|
for detail in results['details']:
|
|||
|
|
status = "✅ 已修复" if detail['fixed'] else "⚠️ 未修复"
|
|||
|
|
print(f" {detail['chapter']}: {status}")
|
|||
|
|
if detail['problems']:
|
|||
|
|
print(f" 问题: {', '.join(detail['problems'])}")
|
|||
|
|
if detail['message']:
|
|||
|
|
print(f" 消息: {detail['message']}")
|
|||
|
|
|
|||
|
|
elif args.action == "fix":
|
|||
|
|
print(f"🛠️ 修复章节格式: {args.directory}")
|
|||
|
|
results = fixer.process_directory(args.directory)
|
|||
|
|
|
|||
|
|
print(f"\n✅ 修复完成:")
|
|||
|
|
print(f" 总章节数: {results['total_chapters']}")
|
|||
|
|
print(f" 发现问题: {results['problems_found']}个")
|
|||
|
|
print(f" 已修复章节: {results['chapters_fixed']}个")
|
|||
|
|
|
|||
|
|
if results['chapters_fixed'] > 0:
|
|||
|
|
print(f"\n📝 修复详情:")
|
|||
|
|
for detail in results['details']:
|
|||
|
|
if detail['fixed']:
|
|||
|
|
print(f" {detail['chapter']}: {detail['message']}")
|
|||
|
|
|
|||
|
|
elif args.action == "monitor":
|
|||
|
|
print("🔄 启动格式监控模式")
|
|||
|
|
print(" 此模式将持续监控目录,自动修复inkos生成的新章节")
|
|||
|
|
print(" 按 Ctrl+C 停止监控")
|
|||
|
|
fixer.monitor_and_fix(args.directory)
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|