#!/usr/bin/env python3 """ 简化版质量检查脚本 """ import os import re import sys from datetime import datetime def count_chinese_chars(text): """统计中文字符数""" return len([c for c in text if '\u4e00' <= c <= '\u9fff']) def analyze_chapter(filepath): """分析章节质量""" print(f"分析文件:{filepath}") with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # 提取章节号 filename = os.path.basename(filepath) match = re.search(r'(\d{4})_', filename) chapter_num = int(match.group(1)) if match else 0 print(f"章节号:{chapter_num}") print(f"文件大小:{len(content)} 字符") # 1. 段落分析 print("\n" + "="*60) print("段落分析:") lines = content.split('\n') paragraphs = [] current_para = [] for line in lines: stripped = line.strip() if not stripped: if current_para: paragraphs.append(''.join(current_para)) current_para = [] else: current_para.append(line) if current_para: paragraphs.append(''.join(current_para)) # 过滤标题和空段落 filtered_paras = [] for para in paragraphs: para_stripped = para.strip() if para_stripped and not para_stripped.startswith('#'): filtered_paras.append(para_stripped) # 统计段落长度 short_count = 0 consecutive_short = 0 max_consecutive = 0 current_streak = 0 lengths = [] for para in filtered_paras: char_count = count_chinese_chars(para) lengths.append(char_count) if char_count < 35: short_count += 1 current_streak += 1 if current_streak > max_consecutive: max_consecutive = current_streak else: current_streak = 0 total_paras = len(filtered_paras) short_ratio = short_count / total_paras if total_paras > 0 else 0 avg_length = sum(lengths) / len(lengths) if lengths else 0 print(f"总段落数:{total_paras}") print(f"短段落数(<35字):{short_count}") print(f"短段落比例:{short_ratio*100:.1f}%") print(f"最长连续短段落:{max_consecutive}") print(f"平均段落长度:{avg_length:.1f}字") # 2. 爽点分析 print("\n" + "="*60) print("爽点分析:") golden_keywords = ["打脸", "升级", "收获", "碾压", "反转", "爽点", "优势", "先知", "重生"] found_keywords = [] for keyword in golden_keywords: if keyword in content: found_keywords.append(keyword) print(f"找到爽点关键词:{len(found_keywords)}/{len(golden_keywords)}") print(f"关键词:{', '.join(found_keywords)}") # 3. 对话分析 print("\n" + "="*60) print("对话分析:") dialogue_pattern = r'["「](.+?)["」]' dialogues = re.findall(dialogue_pattern, content) total_chars = len(content) dialogue_chars = sum(len(d) for d in dialogues) dialogue_ratio = dialogue_chars / total_chars if total_chars > 0 else 0 print(f"对话数量:{len(dialogues)}") print(f"对话比例:{dialogue_ratio*100:.1f}%") # 4. 问题识别 print("\n" + "="*60) print("问题识别:") problems = [] if short_ratio > 0.3: problems.append(f"短段落比例过高 ({short_ratio*100:.1f}%)") if max_consecutive > 3: problems.append(f"连续短段落过多 ({max_consecutive}个)") if len(found_keywords) < 3: problems.append(f"爽点不足 (找到{len(found_keywords)}个,需要至少3个)") if dialogue_ratio < 0.2: problems.append(f"对话比例偏低 ({dialogue_ratio*100:.1f}%)") if problems: print("⚠️ 发现问题:") for i, problem in enumerate(problems, 1): print(f" {i}. {problem}") else: print("✅ 未发现严重问题") # 5. 修复建议 print("\n" + "="*60) print("修复建议:") recommendations = [] if short_ratio > 0.3: recommendations.append("合并短段落,提高段落平均长度") if len(found_keywords) < 3: if chapter_num == 1: recommendations.append("第1章需要:1)明确重生优势 2)建立时间紧迫感 3)设置第一个目标") elif chapter_num <= 3: recommendations.append("黄金三章需要:1)兑现第一个爽点 2)打脸小反派 3)建立升级体系") else: recommendations.append("增加爽点密度:每章至少3个爽点") if dialogue_ratio < 0.2: recommendations.append("增加对话比例,目标30-40%") if recommendations: for i, rec in enumerate(recommendations, 1): print(f" {i}. {rec}") else: print(" ✅ 无需修复") # 6. 生成报告 report = { "chapter": chapter_num, "file": filename, "timestamp": datetime.now().isoformat(), "metrics": { "paragraphs": { "total": total_paras, "short": short_count, "short_ratio": short_ratio, "max_consecutive_short": max_consecutive, "avg_length": avg_length }, "golden_points": { "found": len(found_keywords), "keywords": found_keywords }, "dialogue": { "count": len(dialogues), "ratio": dialogue_ratio } }, "problems": problems, "recommendations": recommendations } # 保存报告 report_file = f"quality_report_ch{chapter_num:04d}.json" import json with open(report_file, 'w', encoding='utf-8') as f: json.dump(report, f, ensure_ascii=False, indent=2) print(f"\n📄 报告已保存到:{report_file}") return report def main(): if len(sys.argv) < 2: print("用法:python simple_quality_check.py <章节文件路径>") sys.exit(1) filepath = sys.argv[1] if not os.path.exists(filepath): print(f"错误:文件不存在 - {filepath}") sys.exit(1) analyze_chapter(filepath) if __name__ == "__main__": main()