#!/usr/bin/env python3 """ 清理重复的爽点段落和修复标题格式 """ import os import re CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters" def clean_duplicate_sections(content): """ 清理重复的爽点段落和修复格式 """ lines = content.split('\n') cleaned_lines = [] i = 0 while i < len(lines): line = lines[i] # 1. 修复章节标题 # 如果行包含"第X章"但不是以#开头,添加# if re.search(r'第\d+章', line) and not line.startswith('#'): # 提取章节编号和标题 match = re.search(r'第(\d+)章\s*(.+)', line) if match: chapter_num = match.group(1) title = match.group(2).strip() line = f"# 第{chapter_num}章 {title}" # 2. 处理重复的爽点段落 if '【爽点' in line: # 检查是否重复 if cleaned_lines and '【爽点' in cleaned_lines[-1]: # 跳过重复的爽点行 i += 1 continue # 检查下一行是否也是爽点段落 if i + 1 < len(lines) and '【爽点' in lines[i + 1]: # 跳过重复的爽点段落 i += 1 continue # 3. 清理多余的空白行 if line.strip() == '': if not cleaned_lines or cleaned_lines[-1].strip() == '': i += 1 continue cleaned_lines.append(line) i += 1 # 重新构建内容 result = '\n'.join(cleaned_lines) # 4. 修复爽点部分的格式 # 将爽点部分移到章节结尾,并确保格式正确 if '【爽点' in result: # 找到爽点部分 sections = result.split('\n\n') main_content = [] shuangdian_sections = [] for section in sections: if '【爽点' in section: shuangdian_sections.append(section) else: main_content.append(section) # 清理爽点部分 cleaned_shuangdian = [] seen = set() for section in shuangdian_sections: # 提取爽点内容(去除重复) lines = section.split('\n') key_lines = [] for line in lines: if '【爽点' in line: # 提取爽点编号 match = re.search(r'【爽点([^】]+)】', line) if match: key = match.group(1) if key not in seen: seen.add(key) key_lines.append(line) elif line.strip() and not line.startswith('【爽点'): key_lines.append(line) if key_lines: cleaned_shuangdian.append('\n'.join(key_lines)) # 重新组合内容 result = '\n\n'.join(main_content) if cleaned_shuangdian: result += '\n\n' + '\n\n'.join(cleaned_shuangdian) # 5. 确保章节标题在开头 title_match = re.search(r'第(\d+)章\s+(.+)', result[:200]) if title_match: chapter_num = title_match.group(1) chapter_title = title_match.group(2).strip() standard_title = f"# 第{chapter_num}章 {chapter_title}" # 替换开头的标题 result = re.sub(r'^.*第\d+章.*$', standard_title, result, flags=re.MULTILINE) # 6. 清理多余的换行 result = re.sub(r'\n{3,}', '\n\n', result) return result.strip() + '\n' def main(): print("清理重复的爽点段落和修复格式...") chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')] for filename in sorted(chapter_files): filepath = os.path.join(CHAPTERS_DIR, filename) print(f"处理: {filename}") with open(filepath, 'r', encoding='utf-8') as f: content = f.read() cleaned_content = clean_duplicate_sections(content) if content != cleaned_content: # 创建备份 backup_path = filepath + '.clean.bak' with open(backup_path, 'w', encoding='utf-8') as f: f.write(content) # 写入清理后的内容 with open(filepath, 'w', encoding='utf-8') as f: f.write(cleaned_content) print(f" ✓ 已清理重复内容") else: print(f" ✓ 无需清理") print("\n清理完成!") if __name__ == '__main__': main()