novel-doomsday-resurgence/tomato-novel/scripts/simple_quality_check.py
唐天洛 bc9188b0fd feat(git-workflow): 添加 Git 工作流和脚本
包含:
- GIT_WORKFLOW.md - 详细的 Git 工作流规范
- scripts/git-setup.sh - Git 工作区初始化脚本
- scripts/git-daily.sh - 日常 Git 管理脚本
- scripts/git-novel-workflow.sh - 小说专用 Git 工作流
- 更新 .gitignore 排除子仓库的 .git 目录
- 添加 novel-tracker/ 目录
- 添加 projects/ 目录(排除子仓库 .git)
- 添加 tomato-novel/ 目录
2026-03-30 15:50:36 +08:00

215 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
简化版质量检查脚本
"""
import os
import re
import sys
from datetime import datetime
def count_chinese_chars(text):
"""统计中文字符数"""
return len([c for c in text if '\u4e00' <= c <= '\u9fff'])
def analyze_chapter(filepath):
"""分析章节质量"""
print(f"分析文件:{filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 提取章节号
filename = os.path.basename(filepath)
match = re.search(r'(\d{4})_', filename)
chapter_num = int(match.group(1)) if match else 0
print(f"章节号:{chapter_num}")
print(f"文件大小:{len(content)} 字符")
# 1. 段落分析
print("\n" + "="*60)
print("段落分析:")
lines = content.split('\n')
paragraphs = []
current_para = []
for line in lines:
stripped = line.strip()
if not stripped:
if current_para:
paragraphs.append(''.join(current_para))
current_para = []
else:
current_para.append(line)
if current_para:
paragraphs.append(''.join(current_para))
# 过滤标题和空段落
filtered_paras = []
for para in paragraphs:
para_stripped = para.strip()
if para_stripped and not para_stripped.startswith('#'):
filtered_paras.append(para_stripped)
# 统计段落长度
short_count = 0
consecutive_short = 0
max_consecutive = 0
current_streak = 0
lengths = []
for para in filtered_paras:
char_count = count_chinese_chars(para)
lengths.append(char_count)
if char_count < 35:
short_count += 1
current_streak += 1
if current_streak > max_consecutive:
max_consecutive = current_streak
else:
current_streak = 0
total_paras = len(filtered_paras)
short_ratio = short_count / total_paras if total_paras > 0 else 0
avg_length = sum(lengths) / len(lengths) if lengths else 0
print(f"总段落数:{total_paras}")
print(f"短段落数(<35字){short_count}")
print(f"短段落比例:{short_ratio*100:.1f}%")
print(f"最长连续短段落:{max_consecutive}")
print(f"平均段落长度:{avg_length:.1f}")
# 2. 爽点分析
print("\n" + "="*60)
print("爽点分析:")
golden_keywords = ["打脸", "升级", "收获", "碾压", "反转", "爽点", "优势", "先知", "重生"]
found_keywords = []
for keyword in golden_keywords:
if keyword in content:
found_keywords.append(keyword)
print(f"找到爽点关键词:{len(found_keywords)}/{len(golden_keywords)}")
print(f"关键词:{', '.join(found_keywords)}")
# 3. 对话分析
print("\n" + "="*60)
print("对话分析:")
dialogue_pattern = r'["「](.+?)["」]'
dialogues = re.findall(dialogue_pattern, content)
total_chars = len(content)
dialogue_chars = sum(len(d) for d in dialogues)
dialogue_ratio = dialogue_chars / total_chars if total_chars > 0 else 0
print(f"对话数量:{len(dialogues)}")
print(f"对话比例:{dialogue_ratio*100:.1f}%")
# 4. 问题识别
print("\n" + "="*60)
print("问题识别:")
problems = []
if short_ratio > 0.3:
problems.append(f"短段落比例过高 ({short_ratio*100:.1f}%)")
if max_consecutive > 3:
problems.append(f"连续短段落过多 ({max_consecutive}个)")
if len(found_keywords) < 3:
problems.append(f"爽点不足 (找到{len(found_keywords)}需要至少3个)")
if dialogue_ratio < 0.2:
problems.append(f"对话比例偏低 ({dialogue_ratio*100:.1f}%)")
if problems:
print("⚠️ 发现问题:")
for i, problem in enumerate(problems, 1):
print(f" {i}. {problem}")
else:
print("✅ 未发现严重问题")
# 5. 修复建议
print("\n" + "="*60)
print("修复建议:")
recommendations = []
if short_ratio > 0.3:
recommendations.append("合并短段落,提高段落平均长度")
if len(found_keywords) < 3:
if chapter_num == 1:
recommendations.append("第1章需要1)明确重生优势 2)建立时间紧迫感 3)设置第一个目标")
elif chapter_num <= 3:
recommendations.append("黄金三章需要1)兑现第一个爽点 2)打脸小反派 3)建立升级体系")
else:
recommendations.append("增加爽点密度每章至少3个爽点")
if dialogue_ratio < 0.2:
recommendations.append("增加对话比例目标30-40%")
if recommendations:
for i, rec in enumerate(recommendations, 1):
print(f" {i}. {rec}")
else:
print(" ✅ 无需修复")
# 6. 生成报告
report = {
"chapter": chapter_num,
"file": filename,
"timestamp": datetime.now().isoformat(),
"metrics": {
"paragraphs": {
"total": total_paras,
"short": short_count,
"short_ratio": short_ratio,
"max_consecutive_short": max_consecutive,
"avg_length": avg_length
},
"golden_points": {
"found": len(found_keywords),
"keywords": found_keywords
},
"dialogue": {
"count": len(dialogues),
"ratio": dialogue_ratio
}
},
"problems": problems,
"recommendations": recommendations
}
# 保存报告
report_file = f"quality_report_ch{chapter_num:04d}.json"
import json
with open(report_file, 'w', encoding='utf-8') as f:
json.dump(report, f, ensure_ascii=False, indent=2)
print(f"\n📄 报告已保存到:{report_file}")
return report
def main():
if len(sys.argv) < 2:
print("用法python simple_quality_check.py <章节文件路径>")
sys.exit(1)
filepath = sys.argv[1]
if not os.path.exists(filepath):
print(f"错误:文件不存在 - {filepath}")
sys.exit(1)
analyze_chapter(filepath)
if __name__ == "__main__":
main()