jianzhihuixiang/skills/character-profile-cn/scripts/analyze_profile.py

401 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
人物档案分析脚本
分析markdown格式的人物档案完整性
"""
import os
import re
import sys
from pathlib import Path
class ProfileAnalyzer:
"""人物档案分析器"""
def __init__(self):
self.placeholder_patterns = [
r'待填写',
r'TODO',
r'待补充',
r'待完善',
r'\[.*\]',
r'\{.*\}',
r'未指定',
r'未知',
r'待定'
]
# 检测平台Windows上使用简单符号
self.is_windows = sys.platform.startswith('win')
# 符号定义
if self.is_windows:
self.symbols = {
'chart': '[统计]',
'trophy': '[评估]',
'file': '[章节]',
'warning': '[注意]',
'bulb': '[建议]',
'check': '[OK]',
'arrow': '->',
'cross': '[X]',
'green_circle': '[良好]',
'yellow_circle': '[一般]',
'red_circle': '[需改进]',
'bullet': '-'
}
else:
self.symbols = {
'chart': '📊',
'trophy': '🏆',
'file': '📑',
'warning': '⚠️',
'bulb': '💡',
'check': '',
'arrow': '',
'cross': '',
'green_circle': '🟢',
'yellow_circle': '🟡',
'red_circle': '🔴',
'bullet': ''
}
def analyze_markdown(self, filepath):
"""分析markdown档案
Args:
filepath: markdown文件路径
Returns:
分析结果字典
"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 基本统计
lines = content.split('\n')
total_lines = len(lines)
non_empty_lines = len([line for line in lines if line.strip()])
# 章节分析
sections = self._extract_sections(content)
# 占位符检测
placeholder_count = 0
placeholder_details = []
for i, line in enumerate(lines, 1):
for pattern in self.placeholder_patterns:
if re.search(pattern, line):
placeholder_count += 1
placeholder_details.append({
'line': i,
'content': line.strip()[:50] + '...' if len(line.strip()) > 50 else line.strip()
})
break
# 完整性评分
completeness_score = self._calculate_completeness_score(
non_empty_lines, placeholder_count, len(sections)
)
return {
'filepath': filepath,
'total_lines': total_lines,
'non_empty_lines': non_empty_lines,
'sections_count': len(sections),
'sections': sections,
'placeholder_count': placeholder_count,
'placeholder_details': placeholder_details[:10], # 只显示前10个
'completeness_score': completeness_score,
'completeness_level': self._get_completeness_level(completeness_score)
}
def _extract_sections(self, content):
"""提取章节信息"""
sections = []
# 匹配各级标题
header_pattern = r'^(#{1,3})\s+(.+?)$'
lines = content.split('\n')
current_section = None
for i, line in enumerate(lines):
match = re.match(header_pattern, line.strip())
if match:
level = len(match.group(1))
title = match.group(2).strip()
# 计算章节内容行数(直到下一个标题)
content_lines = 0
for j in range(i + 1, len(lines)):
if re.match(r'^#{1,3}\s+', lines[j].strip()):
break
if lines[j].strip():
content_lines += 1
sections.append({
'level': level,
'title': title,
'start_line': i + 1,
'content_lines': content_lines
})
return sections
def _calculate_completeness_score(self, non_empty_lines, placeholder_count, sections_count):
"""计算完整性评分0-100"""
if non_empty_lines == 0:
return 0
# 基础分基于非空行数假设完整档案至少50行
base_score = min(100, (non_empty_lines / 50) * 60)
# 占位符扣分每个占位符扣2分最多扣30分
placeholder_penalty = min(30, placeholder_count * 2)
# 章节加分每节加5分最多加20分
section_bonus = min(20, sections_count * 5)
score = base_score - placeholder_penalty + section_bonus
return max(0, min(100, score))
def _get_completeness_level(self, score):
"""获取完整性等级"""
if score >= 90:
return "优秀"
elif score >= 75:
return "良好"
elif score >= 60:
return "一般"
elif score >= 40:
return "待完善"
else:
return "草稿"
def generate_report(self, analysis_result, output_format='text'):
"""生成分析报告"""
result = analysis_result
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案分析报告")
report_lines.append(f"文件: {result['filepath']}")
report_lines.append("=" * 60)
report_lines.append("")
# 基本信息
report_lines.append(f"{self.symbols['chart']} 基本信息")
report_lines.append(f" 总行数: {result['total_lines']}")
report_lines.append(f" 非空行数: {result['non_empty_lines']}")
report_lines.append(f" 章节数: {result['sections_count']}")
report_lines.append(f" 占位符数量: {result['placeholder_count']}")
report_lines.append("")
# 完整性评分
report_lines.append(f"{self.symbols['trophy']} 完整性评估")
report_lines.append(f" 评分: {result['completeness_score']:.1f}/100")
report_lines.append(f" 等级: {result['completeness_level']}")
report_lines.append("")
# 章节详情
if result['sections']:
report_lines.append(f"{self.symbols['file']} 章节详情")
for section in result['sections']:
level_indent = " " * (section['level'] - 1)
report_lines.append(f"{level_indent}{self.symbols['bullet']} {section['title']} (行 {section['start_line']}, {section['content_lines']} 行)")
report_lines.append("")
# 占位符详情
if result['placeholder_details']:
report_lines.append(f"{self.symbols['warning']} 需要完善的部分")
for detail in result['placeholder_details']:
report_lines.append(f"{detail['line']} 行: {detail['content']}")
if result['placeholder_count'] > 10:
report_lines.append(f" ... 还有 {result['placeholder_count'] - 10} 个占位符未显示")
report_lines.append("")
# 建议
report_lines.append(f"{self.symbols['bulb']} 建议")
if result['completeness_score'] >= 80:
report_lines.append(f" {self.symbols['check']} 档案比较完整,可以开始用于创作")
report_lines.append(f" {self.symbols['arrow']} 可以考虑添加更多细节和情感描写")
elif result['completeness_score'] >= 60:
report_lines.append(f" {self.symbols['warning']} 档案基本完整,但还有完善空间")
report_lines.append(f" {self.symbols['arrow']} 建议完善 {result['placeholder_count']} 处占位符")
else:
report_lines.append(f" {self.symbols['cross']} 档案还处于草稿阶段")
report_lines.append(f" {self.symbols['arrow']} 需要补充大量内容,建议逐个章节完善")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
elif output_format == 'json':
import json
return json.dumps(result, ensure_ascii=False, indent=2)
else:
raise ValueError(f"不支持的输出格式: {output_format}")
def analyze_directory(self, directory_path, recursive=True):
"""分析目录下的所有markdown档案"""
directory = Path(directory_path)
if not directory.exists():
raise FileNotFoundError(f"目录不存在: {directory_path}")
# 查找markdown文件
md_files = []
if recursive:
md_files = list(directory.rglob("*.md"))
else:
md_files = list(directory.glob("*.md"))
if not md_files:
return {"message": "未找到markdown文件", "files": []}
# 分析每个文件
results = []
for md_file in md_files:
try:
analysis = self.analyze_markdown(str(md_file))
results.append(analysis)
except Exception as e:
results.append({
'filepath': str(md_file),
'error': str(e)
})
# 按完整性评分排序
valid_results = [r for r in results if 'completeness_score' in r]
sorted_results = sorted(valid_results, key=lambda x: x['completeness_score'], reverse=True)
return {
'total_files': len(md_files),
'successful_analysis': len(valid_results),
'failed_analysis': len(results) - len(valid_results),
'results': sorted_results
}
def generate_directory_report(self, analysis_results, output_format='text'):
"""生成目录分析报告"""
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案目录分析报告")
report_lines.append(f"分析文件数: {analysis_results['total_files']}")
report_lines.append(f"成功分析: {analysis_results['successful_analysis']}")
if analysis_results['failed_analysis'] > 0:
report_lines.append(f"分析失败: {analysis_results['failed_analysis']}")
report_lines.append("=" * 60)
report_lines.append("")
# 文件列表(按评分排序)
if analysis_results['results']:
report_lines.append(f"{self.symbols['file']} 文件完整性排名")
for i, result in enumerate(analysis_results['results'], 1):
score = result['completeness_score']
level = result['completeness_level']
filename = os.path.basename(result['filepath'])
# 使用符号表示等级
if score >= 80:
icon = self.symbols['green_circle']
elif score >= 60:
icon = self.symbols['yellow_circle']
else:
icon = self.symbols['red_circle']
report_lines.append(f"{icon} {i:2d}. {filename:<40} {score:5.1f}分 ({level})")
report_lines.append("")
# 统计信息
avg_score = sum(r['completeness_score'] for r in analysis_results['results']) / len(analysis_results['results'])
max_score = max(r['completeness_score'] for r in analysis_results['results'])
min_score = min(r['completeness_score'] for r in analysis_results['results'])
report_lines.append(f"{self.symbols['chart']} 统计信息")
report_lines.append(f" 平均分: {avg_score:.1f}")
report_lines.append(f" 最高分: {max_score:.1f}")
report_lines.append(f" 最低分: {min_score:.1f}")
report_lines.append("")
# 建议
report_lines.append(f"{self.symbols['bulb']} 整体建议")
if avg_score >= 75:
report_lines.append(f" {self.symbols['check']} 整体完成度良好")
report_lines.append(f" {self.symbols['arrow']} 可以考虑开始故事创作")
elif avg_score >= 50:
report_lines.append(f" {self.symbols['warning']} 整体完成度一般")
report_lines.append(f" {self.symbols['arrow']} 建议继续完善人物档案")
else:
report_lines.append(f" {self.symbols['cross']} 整体完成度较低")
report_lines.append(f" {self.symbols['arrow']} 需要重点完善主要角色的档案")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
else:
import json
return json.dumps(analysis_results, ensure_ascii=False, indent=2)
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description='分析人物档案markdown文件')
parser.add_argument('path', help='要分析的markdown文件或目录路径')
parser.add_argument('--recursive', '-r', action='store_true', help='递归分析目录')
parser.add_argument('--format', '-f', choices=['text', 'json'], default='text', help='输出格式')
parser.add_argument('--output', '-o', help='输出文件路径')
args = parser.parse_args()
analyzer = ProfileAnalyzer()
path = Path(args.path)
try:
if path.is_file():
# 分析单个文件
if path.suffix.lower() != '.md':
print("错误: 文件必须是.md格式")
return 1
analysis = analyzer.analyze_markdown(str(path))
report = analyzer.generate_report(analysis, args.format)
elif path.is_dir():
# 分析目录
analysis_results = analyzer.analyze_directory(str(path), args.recursive)
report = analyzer.generate_directory_report(analysis_results, args.format)
else:
print(f"错误: 路径不存在: {args.path}")
return 1
# 输出结果
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存到: {args.output}")
else:
print(report)
return 0
except Exception as e:
print(f"分析失败: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())