novel-doomsday-resurgence/force_merge_paragraphs.py
唐天洛 5dc8c00de0 feat(sync): 固化小说内容到Git仓库
📚 小说内容:
- 《末日重生-开局囤货十亿物资》33章
- 完整的状态文件、记忆索引、钩子系统

🛠️ 系统配置:
- 版本控制管理系统
- 自动化脚本系统
- 质量监控系统

🧠 固化记忆:
- 长期记忆文件
- 系统配置文档
- 恢复流程指南

💾 数据安全:
- 本地备份系统
- Git版本控制
- 远程同步机制

同步时间: 2026-03-30 16:25:35
系统状态: inkos正常运行中 (PID: 1433309)
创作进度: 第33章《油粮》创作中
2026-03-30 16:25:35 +08:00

138 lines
4.0 KiB
Python

#!/usr/bin/env python3
"""
强力合并段落脚本
彻底解决短段落问题
"""
import os
import re
import shutil
def force_merge_paragraphs(content):
"""强力合并段落"""
# 按空行分割
sections = content.split('\n\n')
merged_sections = []
for section in sections:
lines = section.split('\n')
if not lines:
merged_sections.append('')
continue
# 处理标题行
if lines[0].startswith('# '):
merged_sections.append(section)
continue
# 合并段落
merged_lines = []
current_paragraph = []
for line in lines:
stripped = line.strip()
if not stripped:
continue
# 检查是否是短段落
chinese_chars = len([c for c in stripped if '\u4e00' <= c <= '\u9fff'])
if chinese_chars < 35:
# 短段落,合并到当前段落
current_paragraph.append(stripped)
else:
# 长段落,先处理当前段落
if current_paragraph:
merged_lines.append(' '.join(current_paragraph).strip())
current_paragraph = []
merged_lines.append(stripped)
# 处理剩余的短段落
if current_paragraph:
merged_lines.append(' '.join(current_paragraph).strip())
# 重新组合
if merged_lines:
merged_sections.append('\n'.join(merged_lines))
else:
merged_sections.append('')
return '\n\n'.join(merged_sections)
def fix_chapter(filepath):
"""修复章节"""
print(f"修复: {os.path.basename(filepath)}")
# 备份
backup_path = filepath.replace('.md', '_强力合并前备份.md')
shutil.copy2(filepath, backup_path)
# 读取
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
original_length = len(content)
# 强力合并
fixed_content = force_merge_paragraphs(content)
# 修复格式
fixed_content = fixed_content.replace('——', '')
fixed_content = re.sub(r'["]([^"]+)["]', r'\1」', fixed_content)
# 保存
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_content)
new_length = len(fixed_content)
# 分析结果
paragraphs = [p for p in fixed_content.split('\n') if p.strip() and not p.startswith('#')]
total_paragraphs = len(paragraphs)
short_paragraphs = 0
for para in paragraphs:
chinese_chars = len([c for c in para if '\u4e00' <= c <= '\u9fff'])
if chinese_chars < 35:
short_paragraphs += 1
short_ratio = short_paragraphs / total_paragraphs if total_paragraphs > 0 else 0
print(f"✅ 修复完成")
print(f" - 原始长度: {original_length} 字符")
print(f" - 修复后长度: {new_length} 字符")
print(f" - 段落总数: {total_paragraphs}")
print(f" - 短段落数: {short_paragraphs}")
print(f" - 短段比例: {short_ratio:.1%}")
return short_ratio
def main():
"""主函数"""
chapter_file = "/root/.openclaw/workspace/tomato-novel/books/末日重生-开局囤货十亿物资/chapters/0017_煎熬.md"
if not os.path.exists(chapter_file):
print(f"❌ 文件不存在: {chapter_file}")
return
print("=== 强力合并段落 ===")
print(f"目标文件: {chapter_file}")
print("")
short_ratio = fix_chapter(chapter_file)
print("")
if short_ratio < 0.3:
print("✅ 段落合并成功!短段比例 < 30%")
else:
print(f"⚠️ 仍需优化: 短段比例 {short_ratio:.1%} (目标 < 30%)")
print("")
print("🎯 建议:")
print("1. 运行质量检查验证效果")
print("2. 检查其他章节的段落结构")
print("3. 调整 inkos 写作参数")
print("4. 建立自动合并机制")
if __name__ == "__main__":
main()