novel-doomsday-resurgence/scripts/quality_monitor.py

451 lines
17 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
inkos 质量监控脚本
监控写作质量,自动发现问题并提供修复建议
"""
import os
import json
import re
import sys
from datetime import datetime
from pathlib import Path
class QualityMonitor:
def __init__(self, config_path=None):
self.config = self.load_config(config_path)
self.problems = []
self.recommendations = []
def load_config(self, config_path):
"""加载配置文件"""
default_config = {
"paragraph": {
"min_length": 35,
"max_consecutive_short": 3,
"short_warning_ratio": 0.3
},
"golden_points": {
"required_per_chapter": 3,
"keywords": ["打脸", "升级", "收获", "碾压", "反转", "爽点"]
},
"emotional_arc": {
"required": True,
"min_change": 0.3
}
}
if config_path and os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
except:
return default_config
return default_config
def analyze_chapter(self, chapter_path):
"""分析章节质量"""
if not os.path.exists(chapter_path):
return {"error": "文件不存在"}
with open(chapter_path, 'r', encoding='utf-8') as f:
content = f.read()
# 提取章节信息
chapter_num = self.extract_chapter_number(chapter_path)
# 分析各项指标
results = {
"chapter": chapter_num,
"file": os.path.basename(chapter_path),
"timestamp": datetime.now().isoformat(),
"metrics": {}
}
# 1. 段落分析
para_results = self.analyze_paragraphs(content)
results["metrics"]["paragraphs"] = para_results
# 2. 爽点分析
golden_results = self.analyze_golden_points(content, chapter_num)
results["metrics"]["golden_points"] = golden_results
# 3. 情绪弧线分析
emotion_results = self.analyze_emotional_arc(content)
results["metrics"]["emotional_arc"] = emotion_results
# 4. 对话分析
dialogue_results = self.analyze_dialogue(content)
results["metrics"]["dialogue"] = dialogue_results
# 5. 问题汇总
problems = self.identify_problems(results)
results["problems"] = problems
# 6. 修复建议
recommendations = self.generate_recommendations(problems, chapter_num)
results["recommendations"] = recommendations
return results
def extract_chapter_number(self, filepath):
"""从文件名提取章节号"""
filename = os.path.basename(filepath)
match = re.search(r'(\d{4})_', filename)
if match:
return int(match.group(1))
return 0
def analyze_paragraphs(self, content):
"""分析段落结构"""
lines = content.split('\n')
paragraphs = []
current_para = []
for line in lines:
stripped = line.strip()
if not stripped:
if current_para:
paragraphs.append(''.join(current_para))
current_para = []
else:
current_para.append(line + ' ')
if current_para:
paragraphs.append(''.join(current_para))
# 过滤空段落和标题
filtered_paras = []
for para in paragraphs:
para_stripped = para.strip()
if para_stripped and not para_stripped.startswith('#'):
filtered_paras.append(para_stripped)
# 统计段落长度
lengths = []
short_count = 0
consecutive_short = 0
max_consecutive = 0
current_streak = 0
for para in filtered_paras:
# 计算中文字符数
chinese_chars = len([c for c in para if '\u4e00' <= c <= '\u9fff'])
# 数字和英文字母
other_chars = len(re.findall(r'[a-zA-Z0-9]', para))
total = chinese_chars + other_chars
lengths.append(total)
if total < self.config["paragraph"]["min_length"]:
short_count += 1
current_streak += 1
if current_streak > max_consecutive:
max_consecutive = current_streak
else:
current_streak = 0
total_paras = len(filtered_paras)
short_ratio = short_count / total_paras if total_paras > 0 else 0
return {
"total_paragraphs": total_paras,
"short_paragraphs": short_count,
"short_ratio": round(short_ratio, 3),
"max_consecutive_short": max_consecutive,
"avg_length": round(sum(lengths) / len(lengths)) if lengths else 0,
"min_length": min(lengths) if lengths else 0,
"max_length": max(lengths) if lengths else 0,
"lengths": lengths[:20] # 只保留前20个用于展示
}
def analyze_golden_points(self, content, chapter_num):
"""分析爽点密度"""
keywords = self.config["golden_points"]["keywords"]
required = self.config["golden_points"]["required_per_chapter"]
found_keywords = []
keyword_positions = {}
for keyword in keywords:
if keyword in content:
found_keywords.append(keyword)
# 统计出现次数
count = content.count(keyword)
keyword_positions[keyword] = count
# 根据章节号调整期望值
if chapter_num == 1:
expected_min = 3 # 黄金三章需要更多爽点
elif chapter_num <= 3:
expected_min = 2
else:
expected_min = required
return {
"found_keywords": found_keywords,
"total_found": len(found_keywords),
"expected_min": expected_min,
"meets_requirement": len(found_keywords) >= expected_min,
"keyword_counts": keyword_positions,
"content_samples": self.extract_golden_point_samples(content, found_keywords)
}
def extract_golden_point_samples(self, content, keywords, sample_count=3):
"""提取爽点示例"""
samples = []
lines = content.split('\n')
for keyword in keywords[:3]: # 最多检查3个关键词
for i, line in enumerate(lines):
if keyword in line and len(line.strip()) > 20:
# 取上下文
start = max(0, i - 1)
end = min(len(lines), i + 2)
context = '\n'.join(lines[start:end])
samples.append({
"keyword": keyword,
"context": context
})
if len(samples) >= sample_count:
break
if len(samples) >= sample_count:
break
return samples
def analyze_emotional_arc(self, content):
"""分析情绪弧线(简化版)"""
# 情绪关键词
positive_words = ["兴奋", "开心", "满足", "自信", "希望", "轻松"]
negative_words = ["紧张", "焦虑", "恐惧", "痛苦", "绝望", "压力"]
neutral_words = ["平静", "思考", "观察", "计算", "等待"]
positive_count = sum(content.count(word) for word in positive_words)
negative_count = sum(content.count(word) for word in negative_words)
neutral_count = sum(content.count(word) for word in neutral_words)
total = positive_count + negative_count + neutral_count
if total == 0:
intensity = 0
else:
# 情绪强度 = (积极+消极)/总数
intensity = (positive_count + negative_count) / total
# 情绪变化(简化:检查是否有情绪转换)
lines = content.split('\n')
emotion_changes = 0
last_emotion = None
for line in lines[:50]: # 只检查前50行
line_emotion = None
if any(word in line for word in positive_words):
line_emotion = "positive"
elif any(word in line for word in negative_words):
line_emotion = "negative"
elif any(word in line for word in neutral_words):
line_emotion = "neutral"
if last_emotion and line_emotion and line_emotion != last_emotion:
emotion_changes += 1
if line_emotion:
last_emotion = line_emotion
return {
"positive_count": positive_count,
"negative_count": negative_count,
"neutral_count": neutral_count,
"total_emotion_words": total,
"emotional_intensity": round(intensity, 3),
"emotion_changes": emotion_changes,
"has_emotional_arc": emotion_changes >= 2
}
def analyze_dialogue(self, content):
"""分析对话质量"""
# 简单的对话检测
dialogue_pattern = r'["「](.+?)["」]'
dialogues = re.findall(dialogue_pattern, content)
total_chars = len(content)
dialogue_chars = sum(len(d) for d in dialogues)
dialogue_ratio = dialogue_chars / total_chars if total_chars > 0 else 0
# 对话长度分析
dialogue_lengths = [len(d) for d in dialogues]
avg_dialogue_length = sum(dialogue_lengths) / len(dialogue_lengths) if dialogues else 0
return {
"dialogue_count": len(dialogues),
"dialogue_ratio": round(dialogue_ratio, 3),
"avg_dialogue_length": round(avg_dialogue_length, 1),
"sample_dialogues": dialogues[:5] # 前5个对话示例
}
def identify_problems(self, results):
"""识别问题"""
problems = []
# 段落问题
para_metrics = results["metrics"]["paragraphs"]
if para_metrics["short_ratio"] > self.config["paragraph"]["short_warning_ratio"]:
problems.append({
"type": "paragraph_structure",
"severity": "high",
"description": f"短段落比例过高:{para_metrics['short_ratio']*100:.1f}%",
"details": f"{para_metrics['total_paragraphs']}个段落,其中{para_metrics['short_paragraphs']}个短段落"
})
if para_metrics["max_consecutive_short"] > self.config["paragraph"]["max_consecutive_short"]:
problems.append({
"type": "consecutive_short_paragraphs",
"severity": "medium",
"description": f"连续短段落过多:{para_metrics['max_consecutive_short']}",
"details": "影响阅读流畅性"
})
# 爽点问题
golden_metrics = results["metrics"]["golden_points"]
if not golden_metrics["meets_requirement"]:
problems.append({
"type": "insufficient_golden_points",
"severity": "high",
"description": f"爽点不足:找到{golden_metrics['total_found']}个,需要{golden_metrics['expected_min']}",
"details": f"找到的关键词:{', '.join(golden_metrics['found_keywords'])}"
})
# 情绪弧线问题
emotion_metrics = results["metrics"]["emotional_arc"]
if self.config["emotional_arc"]["required"] and not emotion_metrics["has_emotional_arc"]:
problems.append({
"type": "flat_emotional_arc",
"severity": "medium",
"description": "情绪弧线平坦",
"details": f"情绪变化次数:{emotion_metrics['emotion_changes']}"
})
# 对话问题
dialogue_metrics = results["metrics"]["dialogue"]
if dialogue_metrics["dialogue_ratio"] < 0.2:
problems.append({
"type": "low_dialogue_ratio",
"severity": "low",
"description": f"对话比例偏低:{dialogue_metrics['dialogue_ratio']*100:.1f}%",
"details": "番茄小说建议对话比例在30-40%"
})
return problems
def generate_recommendations(self, problems, chapter_num):
"""生成修复建议"""
recommendations = []
for problem in problems:
if problem["type"] == "paragraph_structure":
recommendations.append({
"action": "merge_short_paragraphs",
"priority": "high" if problem["severity"] == "high" else "medium",
"description": "合并短段落,提高段落平均长度",
"command": f"python3 merge_paragraphs.py --chapter {chapter_num} --min-length 35"
})
elif problem["type"] == "insufficient_golden_points":
recommendations.append({
"action": "add_golden_points",
"priority": "high",
"description": "增加爽点密度",
"suggestions": [
"增加一个打脸情节",
"展现主角的优势",
"设置一个小型反转",
"增加资源收获"
]
})
elif problem["type"] == "flat_emotional_arc":
recommendations.append({
"action": "enhance_emotional_arc",
"priority": "medium",
"description": "增强情绪起伏",
"suggestions": [
"在章节开头设置紧张情绪",
"在中间加入情绪释放点",
"在结尾设置情绪钩子"
]
})
return recommendations
def generate_report(self, results, output_path=None):
"""生成质量报告"""
report = {
"summary": {
"chapter": results["chapter"],
"file": results["file"],
"timestamp": results["timestamp"],
"problem_count": len(results["problems"]),
"recommendation_count": len(results["recommendations"])
},
"metrics": results["metrics"],
"problems": results["problems"],
"recommendations": results["recommendations"]
}
if output_path:
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(report, f, ensure_ascii=False, indent=2)
return report
def print_summary(self, report):
"""打印摘要"""
print(f"\n{'='*60}")
print(f"章节质量报告 - 第{report['summary']['chapter']}")
print(f"{'='*60}")
print(f"\n📊 指标概览:")
print(f" 段落总数:{report['metrics']['paragraphs']['total_paragraphs']}")
print(f" 短段落比例:{report['metrics']['paragraphs']['short_ratio']*100:.1f}%")
print(f" 爽点数量:{report['metrics']['golden_points']['total_found']}")
print(f" 情绪变化:{report['metrics']['emotional_arc']['emotion_changes']}")
print(f" 对话比例:{report['metrics']['dialogue']['dialogue_ratio']*100:.1f}%")
if report['problems']:
print(f"\n⚠️ 发现问题({len(report['problems'])}个):")
for i, problem in enumerate(report['problems'], 1):
print(f" {i}. [{problem['severity'].upper()}] {problem['description']}")
if report['recommendations']:
print(f"\n💡 修复建议({len(report['recommendations'])}条):")
for i, rec in enumerate(report['recommendations'], 1):
print(f" {i}. [{rec['priority']}] {rec['description']}")
def main():
"""主函数"""
if len(sys.argv) < 2:
print("用法python quality_monitor.py <章节文件路径> [配置文件路径]")
sys.exit(1)
chapter_path = sys.argv[1]
config_path = sys.argv[2] if len(sys.argv) > 2 else None
monitor = QualityMonitor(config_path)
results = monitor.analyze_chapter(chapter_path)
# 生成报告文件
report_file = f"quality_report_chapter{results['chapter']}.json"
report = monitor.generate_report(results, report_file)
# 打印摘要
monitor.print_summary(report)
print(f"\n📄 完整报告已保存到:{report_file}")
# 如果有严重问题,返回非零退出码
if any(p["severity"] == "high" for p in results["problems"]):
sys.exit(1)
else:
sys.exit(0)
if __name__ == "__main__":
main()