📚 小说内容: - 《末日重生-开局囤货十亿物资》33章 - 完整的状态文件、记忆索引、钩子系统 🛠️ 系统配置: - 版本控制管理系统 - 自动化脚本系统 - 质量监控系统 🧠 固化记忆: - 长期记忆文件 - 系统配置文档 - 恢复流程指南 💾 数据安全: - 本地备份系统 - Git版本控制 - 远程同步机制 同步时间: 2026-03-30 16:25:35 系统状态: inkos正常运行中 (PID: 1433309) 创作进度: 第33章《油粮》创作中
138 lines
4.0 KiB
Python
138 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
强力合并段落脚本
|
|
彻底解决短段落问题
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
|
|
def force_merge_paragraphs(content):
|
|
"""强力合并段落"""
|
|
# 按空行分割
|
|
sections = content.split('\n\n')
|
|
merged_sections = []
|
|
|
|
for section in sections:
|
|
lines = section.split('\n')
|
|
if not lines:
|
|
merged_sections.append('')
|
|
continue
|
|
|
|
# 处理标题行
|
|
if lines[0].startswith('# '):
|
|
merged_sections.append(section)
|
|
continue
|
|
|
|
# 合并段落
|
|
merged_lines = []
|
|
current_paragraph = []
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
# 检查是否是短段落
|
|
chinese_chars = len([c for c in stripped if '\u4e00' <= c <= '\u9fff'])
|
|
|
|
if chinese_chars < 35:
|
|
# 短段落,合并到当前段落
|
|
current_paragraph.append(stripped)
|
|
else:
|
|
# 长段落,先处理当前段落
|
|
if current_paragraph:
|
|
merged_lines.append(' '.join(current_paragraph).strip())
|
|
current_paragraph = []
|
|
merged_lines.append(stripped)
|
|
|
|
# 处理剩余的短段落
|
|
if current_paragraph:
|
|
merged_lines.append(' '.join(current_paragraph).strip())
|
|
|
|
# 重新组合
|
|
if merged_lines:
|
|
merged_sections.append('\n'.join(merged_lines))
|
|
else:
|
|
merged_sections.append('')
|
|
|
|
return '\n\n'.join(merged_sections)
|
|
|
|
def fix_chapter(filepath):
|
|
"""修复章节"""
|
|
print(f"修复: {os.path.basename(filepath)}")
|
|
|
|
# 备份
|
|
backup_path = filepath.replace('.md', '_强力合并前备份.md')
|
|
shutil.copy2(filepath, backup_path)
|
|
|
|
# 读取
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
original_length = len(content)
|
|
|
|
# 强力合并
|
|
fixed_content = force_merge_paragraphs(content)
|
|
|
|
# 修复格式
|
|
fixed_content = fixed_content.replace('——', '—')
|
|
fixed_content = re.sub(r'["]([^"]+)["]', r'「\1」', fixed_content)
|
|
|
|
# 保存
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(fixed_content)
|
|
|
|
new_length = len(fixed_content)
|
|
|
|
# 分析结果
|
|
paragraphs = [p for p in fixed_content.split('\n') if p.strip() and not p.startswith('#')]
|
|
total_paragraphs = len(paragraphs)
|
|
|
|
short_paragraphs = 0
|
|
for para in paragraphs:
|
|
chinese_chars = len([c for c in para if '\u4e00' <= c <= '\u9fff'])
|
|
if chinese_chars < 35:
|
|
short_paragraphs += 1
|
|
|
|
short_ratio = short_paragraphs / total_paragraphs if total_paragraphs > 0 else 0
|
|
|
|
print(f"✅ 修复完成")
|
|
print(f" - 原始长度: {original_length} 字符")
|
|
print(f" - 修复后长度: {new_length} 字符")
|
|
print(f" - 段落总数: {total_paragraphs}")
|
|
print(f" - 短段落数: {short_paragraphs}")
|
|
print(f" - 短段比例: {short_ratio:.1%}")
|
|
|
|
return short_ratio
|
|
|
|
def main():
|
|
"""主函数"""
|
|
chapter_file = "/root/.openclaw/workspace/tomato-novel/books/末日重生-开局囤货十亿物资/chapters/0017_煎熬.md"
|
|
|
|
if not os.path.exists(chapter_file):
|
|
print(f"❌ 文件不存在: {chapter_file}")
|
|
return
|
|
|
|
print("=== 强力合并段落 ===")
|
|
print(f"目标文件: {chapter_file}")
|
|
print("")
|
|
|
|
short_ratio = fix_chapter(chapter_file)
|
|
|
|
print("")
|
|
if short_ratio < 0.3:
|
|
print("✅ 段落合并成功!短段比例 < 30%")
|
|
else:
|
|
print(f"⚠️ 仍需优化: 短段比例 {short_ratio:.1%} (目标 < 30%)")
|
|
|
|
print("")
|
|
print("🎯 建议:")
|
|
print("1. 运行质量检查验证效果")
|
|
print("2. 检查其他章节的段落结构")
|
|
print("3. 调整 inkos 写作参数")
|
|
print("4. 建立自动合并机制")
|
|
|
|
if __name__ == "__main__":
|
|
main() |