215 lines
6.3 KiB
Python
215 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
简化版质量检查脚本
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
from datetime import datetime
|
||
|
||
def count_chinese_chars(text):
|
||
"""统计中文字符数"""
|
||
return len([c for c in text if '\u4e00' <= c <= '\u9fff'])
|
||
|
||
def analyze_chapter(filepath):
|
||
"""分析章节质量"""
|
||
print(f"分析文件:{filepath}")
|
||
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 提取章节号
|
||
filename = os.path.basename(filepath)
|
||
match = re.search(r'(\d{4})_', filename)
|
||
chapter_num = int(match.group(1)) if match else 0
|
||
|
||
print(f"章节号:{chapter_num}")
|
||
print(f"文件大小:{len(content)} 字符")
|
||
|
||
# 1. 段落分析
|
||
print("\n" + "="*60)
|
||
print("段落分析:")
|
||
|
||
lines = content.split('\n')
|
||
paragraphs = []
|
||
current_para = []
|
||
|
||
for line in lines:
|
||
stripped = line.strip()
|
||
if not stripped:
|
||
if current_para:
|
||
paragraphs.append(''.join(current_para))
|
||
current_para = []
|
||
else:
|
||
current_para.append(line)
|
||
|
||
if current_para:
|
||
paragraphs.append(''.join(current_para))
|
||
|
||
# 过滤标题和空段落
|
||
filtered_paras = []
|
||
for para in paragraphs:
|
||
para_stripped = para.strip()
|
||
if para_stripped and not para_stripped.startswith('#'):
|
||
filtered_paras.append(para_stripped)
|
||
|
||
# 统计段落长度
|
||
short_count = 0
|
||
consecutive_short = 0
|
||
max_consecutive = 0
|
||
current_streak = 0
|
||
lengths = []
|
||
|
||
for para in filtered_paras:
|
||
char_count = count_chinese_chars(para)
|
||
lengths.append(char_count)
|
||
|
||
if char_count < 35:
|
||
short_count += 1
|
||
current_streak += 1
|
||
if current_streak > max_consecutive:
|
||
max_consecutive = current_streak
|
||
else:
|
||
current_streak = 0
|
||
|
||
total_paras = len(filtered_paras)
|
||
short_ratio = short_count / total_paras if total_paras > 0 else 0
|
||
avg_length = sum(lengths) / len(lengths) if lengths else 0
|
||
|
||
print(f"总段落数:{total_paras}")
|
||
print(f"短段落数(<35字):{short_count}")
|
||
print(f"短段落比例:{short_ratio*100:.1f}%")
|
||
print(f"最长连续短段落:{max_consecutive}")
|
||
print(f"平均段落长度:{avg_length:.1f}字")
|
||
|
||
# 2. 爽点分析
|
||
print("\n" + "="*60)
|
||
print("爽点分析:")
|
||
|
||
golden_keywords = ["打脸", "升级", "收获", "碾压", "反转", "爽点", "优势", "先知", "重生"]
|
||
found_keywords = []
|
||
|
||
for keyword in golden_keywords:
|
||
if keyword in content:
|
||
found_keywords.append(keyword)
|
||
|
||
print(f"找到爽点关键词:{len(found_keywords)}/{len(golden_keywords)}")
|
||
print(f"关键词:{', '.join(found_keywords)}")
|
||
|
||
# 3. 对话分析
|
||
print("\n" + "="*60)
|
||
print("对话分析:")
|
||
|
||
dialogue_pattern = r'["「](.+?)["」]'
|
||
dialogues = re.findall(dialogue_pattern, content)
|
||
|
||
total_chars = len(content)
|
||
dialogue_chars = sum(len(d) for d in dialogues)
|
||
dialogue_ratio = dialogue_chars / total_chars if total_chars > 0 else 0
|
||
|
||
print(f"对话数量:{len(dialogues)}")
|
||
print(f"对话比例:{dialogue_ratio*100:.1f}%")
|
||
|
||
# 4. 问题识别
|
||
print("\n" + "="*60)
|
||
print("问题识别:")
|
||
|
||
problems = []
|
||
|
||
if short_ratio > 0.3:
|
||
problems.append(f"短段落比例过高 ({short_ratio*100:.1f}%)")
|
||
|
||
if max_consecutive > 3:
|
||
problems.append(f"连续短段落过多 ({max_consecutive}个)")
|
||
|
||
if len(found_keywords) < 3:
|
||
problems.append(f"爽点不足 (找到{len(found_keywords)}个,需要至少3个)")
|
||
|
||
if dialogue_ratio < 0.2:
|
||
problems.append(f"对话比例偏低 ({dialogue_ratio*100:.1f}%)")
|
||
|
||
if problems:
|
||
print("⚠️ 发现问题:")
|
||
for i, problem in enumerate(problems, 1):
|
||
print(f" {i}. {problem}")
|
||
else:
|
||
print("✅ 未发现严重问题")
|
||
|
||
# 5. 修复建议
|
||
print("\n" + "="*60)
|
||
print("修复建议:")
|
||
|
||
recommendations = []
|
||
|
||
if short_ratio > 0.3:
|
||
recommendations.append("合并短段落,提高段落平均长度")
|
||
|
||
if len(found_keywords) < 3:
|
||
if chapter_num == 1:
|
||
recommendations.append("第1章需要:1)明确重生优势 2)建立时间紧迫感 3)设置第一个目标")
|
||
elif chapter_num <= 3:
|
||
recommendations.append("黄金三章需要:1)兑现第一个爽点 2)打脸小反派 3)建立升级体系")
|
||
else:
|
||
recommendations.append("增加爽点密度:每章至少3个爽点")
|
||
|
||
if dialogue_ratio < 0.2:
|
||
recommendations.append("增加对话比例,目标30-40%")
|
||
|
||
if recommendations:
|
||
for i, rec in enumerate(recommendations, 1):
|
||
print(f" {i}. {rec}")
|
||
else:
|
||
print(" ✅ 无需修复")
|
||
|
||
# 6. 生成报告
|
||
report = {
|
||
"chapter": chapter_num,
|
||
"file": filename,
|
||
"timestamp": datetime.now().isoformat(),
|
||
"metrics": {
|
||
"paragraphs": {
|
||
"total": total_paras,
|
||
"short": short_count,
|
||
"short_ratio": short_ratio,
|
||
"max_consecutive_short": max_consecutive,
|
||
"avg_length": avg_length
|
||
},
|
||
"golden_points": {
|
||
"found": len(found_keywords),
|
||
"keywords": found_keywords
|
||
},
|
||
"dialogue": {
|
||
"count": len(dialogues),
|
||
"ratio": dialogue_ratio
|
||
}
|
||
},
|
||
"problems": problems,
|
||
"recommendations": recommendations
|
||
}
|
||
|
||
# 保存报告
|
||
report_file = f"quality_report_ch{chapter_num:04d}.json"
|
||
import json
|
||
with open(report_file, 'w', encoding='utf-8') as f:
|
||
json.dump(report, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"\n📄 报告已保存到:{report_file}")
|
||
|
||
return report
|
||
|
||
def main():
|
||
if len(sys.argv) < 2:
|
||
print("用法:python simple_quality_check.py <章节文件路径>")
|
||
sys.exit(1)
|
||
|
||
filepath = sys.argv[1]
|
||
|
||
if not os.path.exists(filepath):
|
||
print(f"错误:文件不存在 - {filepath}")
|
||
sys.exit(1)
|
||
|
||
analyze_chapter(filepath)
|
||
|
||
if __name__ == "__main__":
|
||
main() |