#!/usr/bin/env python3 """ 强力合并段落脚本 彻底解决短段落问题 """ import os import re import shutil def force_merge_paragraphs(content): """强力合并段落""" # 按空行分割 sections = content.split('\n\n') merged_sections = [] for section in sections: lines = section.split('\n') if not lines: merged_sections.append('') continue # 处理标题行 if lines[0].startswith('# '): merged_sections.append(section) continue # 合并段落 merged_lines = [] current_paragraph = [] for line in lines: stripped = line.strip() if not stripped: continue # 检查是否是短段落 chinese_chars = len([c for c in stripped if '\u4e00' <= c <= '\u9fff']) if chinese_chars < 35: # 短段落,合并到当前段落 current_paragraph.append(stripped) else: # 长段落,先处理当前段落 if current_paragraph: merged_lines.append(' '.join(current_paragraph).strip()) current_paragraph = [] merged_lines.append(stripped) # 处理剩余的短段落 if current_paragraph: merged_lines.append(' '.join(current_paragraph).strip()) # 重新组合 if merged_lines: merged_sections.append('\n'.join(merged_lines)) else: merged_sections.append('') return '\n\n'.join(merged_sections) def fix_chapter(filepath): """修复章节""" print(f"修复: {os.path.basename(filepath)}") # 备份 backup_path = filepath.replace('.md', '_强力合并前备份.md') shutil.copy2(filepath, backup_path) # 读取 with open(filepath, 'r', encoding='utf-8') as f: content = f.read() original_length = len(content) # 强力合并 fixed_content = force_merge_paragraphs(content) # 修复格式 fixed_content = fixed_content.replace('——', '—') fixed_content = re.sub(r'["]([^"]+)["]', r'「\1」', fixed_content) # 保存 with open(filepath, 'w', encoding='utf-8') as f: f.write(fixed_content) new_length = len(fixed_content) # 分析结果 paragraphs = [p for p in fixed_content.split('\n') if p.strip() and not p.startswith('#')] total_paragraphs = len(paragraphs) short_paragraphs = 0 for para in paragraphs: chinese_chars = len([c for c in para if '\u4e00' <= c <= '\u9fff']) if chinese_chars < 35: short_paragraphs += 1 short_ratio = short_paragraphs / total_paragraphs if total_paragraphs > 0 else 0 print(f"✅ 修复完成") print(f" - 原始长度: {original_length} 字符") print(f" - 修复后长度: {new_length} 字符") print(f" - 段落总数: {total_paragraphs}") print(f" - 短段落数: {short_paragraphs}") print(f" - 短段比例: {short_ratio:.1%}") return short_ratio def main(): """主函数""" chapter_file = "/root/.openclaw/workspace/tomato-novel/books/末日重生-开局囤货十亿物资/chapters/0017_煎熬.md" if not os.path.exists(chapter_file): print(f"❌ 文件不存在: {chapter_file}") return print("=== 强力合并段落 ===") print(f"目标文件: {chapter_file}") print("") short_ratio = fix_chapter(chapter_file) print("") if short_ratio < 0.3: print("✅ 段落合并成功!短段比例 < 30%") else: print(f"⚠️ 仍需优化: 短段比例 {short_ratio:.1%} (目标 < 30%)") print("") print("🎯 建议:") print("1. 运行质量检查验证效果") print("2. 检查其他章节的段落结构") print("3. 调整 inkos 写作参数") print("4. 建立自动合并机制") if __name__ == "__main__": main()