jianzhihuixiang/skills/character-profile-cn/scripts/analyze_profile.py

401 lines
15 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
人物档案分析脚本
分析markdown格式的人物档案完整性
"""
import os
import re
import sys
from pathlib import Path
class ProfileAnalyzer:
"""人物档案分析器"""
def __init__(self):
self.placeholder_patterns = [
r'待填写',
r'TODO',
r'待补充',
r'待完善',
r'\[.*\]',
r'\{.*\}',
r'未指定',
r'未知',
r'待定'
]
# 检测平台Windows上使用简单符号
self.is_windows = sys.platform.startswith('win')
# 符号定义
if self.is_windows:
self.symbols = {
'chart': '[统计]',
'trophy': '[评估]',
'file': '[章节]',
'warning': '[注意]',
'bulb': '[建议]',
'check': '[OK]',
'arrow': '->',
'cross': '[X]',
'green_circle': '[良好]',
'yellow_circle': '[一般]',
'red_circle': '[需改进]',
'bullet': '-'
}
else:
self.symbols = {
'chart': '📊',
'trophy': '🏆',
'file': '📑',
'warning': '⚠️',
'bulb': '💡',
'check': '',
'arrow': '',
'cross': '',
'green_circle': '🟢',
'yellow_circle': '🟡',
'red_circle': '🔴',
'bullet': ''
}
def analyze_markdown(self, filepath):
"""分析markdown档案
Args:
filepath: markdown文件路径
Returns:
分析结果字典
"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 基本统计
lines = content.split('\n')
total_lines = len(lines)
non_empty_lines = len([line for line in lines if line.strip()])
# 章节分析
sections = self._extract_sections(content)
# 占位符检测
placeholder_count = 0
placeholder_details = []
for i, line in enumerate(lines, 1):
for pattern in self.placeholder_patterns:
if re.search(pattern, line):
placeholder_count += 1
placeholder_details.append({
'line': i,
'content': line.strip()[:50] + '...' if len(line.strip()) > 50 else line.strip()
})
break
# 完整性评分
completeness_score = self._calculate_completeness_score(
non_empty_lines, placeholder_count, len(sections)
)
return {
'filepath': filepath,
'total_lines': total_lines,
'non_empty_lines': non_empty_lines,
'sections_count': len(sections),
'sections': sections,
'placeholder_count': placeholder_count,
'placeholder_details': placeholder_details[:10], # 只显示前10个
'completeness_score': completeness_score,
'completeness_level': self._get_completeness_level(completeness_score)
}
def _extract_sections(self, content):
"""提取章节信息"""
sections = []
# 匹配各级标题
header_pattern = r'^(#{1,3})\s+(.+?)$'
lines = content.split('\n')
current_section = None
for i, line in enumerate(lines):
match = re.match(header_pattern, line.strip())
if match:
level = len(match.group(1))
title = match.group(2).strip()
# 计算章节内容行数(直到下一个标题)
content_lines = 0
for j in range(i + 1, len(lines)):
if re.match(r'^#{1,3}\s+', lines[j].strip()):
break
if lines[j].strip():
content_lines += 1
sections.append({
'level': level,
'title': title,
'start_line': i + 1,
'content_lines': content_lines
})
return sections
def _calculate_completeness_score(self, non_empty_lines, placeholder_count, sections_count):
"""计算完整性评分0-100"""
if non_empty_lines == 0:
return 0
# 基础分基于非空行数假设完整档案至少50行
base_score = min(100, (non_empty_lines / 50) * 60)
# 占位符扣分每个占位符扣2分最多扣30分
placeholder_penalty = min(30, placeholder_count * 2)
# 章节加分每节加5分最多加20分
section_bonus = min(20, sections_count * 5)
score = base_score - placeholder_penalty + section_bonus
return max(0, min(100, score))
def _get_completeness_level(self, score):
"""获取完整性等级"""
if score >= 90:
return "优秀"
elif score >= 75:
return "良好"
elif score >= 60:
return "一般"
elif score >= 40:
return "待完善"
else:
return "草稿"
def generate_report(self, analysis_result, output_format='text'):
"""生成分析报告"""
result = analysis_result
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案分析报告")
report_lines.append(f"文件: {result['filepath']}")
report_lines.append("=" * 60)
report_lines.append("")
# 基本信息
report_lines.append(f"{self.symbols['chart']} 基本信息")
report_lines.append(f" 总行数: {result['total_lines']}")
report_lines.append(f" 非空行数: {result['non_empty_lines']}")
report_lines.append(f" 章节数: {result['sections_count']}")
report_lines.append(f" 占位符数量: {result['placeholder_count']}")
report_lines.append("")
# 完整性评分
report_lines.append(f"{self.symbols['trophy']} 完整性评估")
report_lines.append(f" 评分: {result['completeness_score']:.1f}/100")
report_lines.append(f" 等级: {result['completeness_level']}")
report_lines.append("")
# 章节详情
if result['sections']:
report_lines.append(f"{self.symbols['file']} 章节详情")
for section in result['sections']:
level_indent = " " * (section['level'] - 1)
report_lines.append(f"{level_indent}{self.symbols['bullet']} {section['title']} (行 {section['start_line']}, {section['content_lines']} 行)")
report_lines.append("")
# 占位符详情
if result['placeholder_details']:
report_lines.append(f"{self.symbols['warning']} 需要完善的部分")
for detail in result['placeholder_details']:
report_lines.append(f"{detail['line']} 行: {detail['content']}")
if result['placeholder_count'] > 10:
report_lines.append(f" ... 还有 {result['placeholder_count'] - 10} 个占位符未显示")
report_lines.append("")
# 建议
report_lines.append(f"{self.symbols['bulb']} 建议")
if result['completeness_score'] >= 80:
report_lines.append(f" {self.symbols['check']} 档案比较完整,可以开始用于创作")
report_lines.append(f" {self.symbols['arrow']} 可以考虑添加更多细节和情感描写")
elif result['completeness_score'] >= 60:
report_lines.append(f" {self.symbols['warning']} 档案基本完整,但还有完善空间")
report_lines.append(f" {self.symbols['arrow']} 建议完善 {result['placeholder_count']} 处占位符")
else:
report_lines.append(f" {self.symbols['cross']} 档案还处于草稿阶段")
report_lines.append(f" {self.symbols['arrow']} 需要补充大量内容,建议逐个章节完善")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
elif output_format == 'json':
import json
return json.dumps(result, ensure_ascii=False, indent=2)
else:
raise ValueError(f"不支持的输出格式: {output_format}")
def analyze_directory(self, directory_path, recursive=True):
"""分析目录下的所有markdown档案"""
directory = Path(directory_path)
if not directory.exists():
raise FileNotFoundError(f"目录不存在: {directory_path}")
# 查找markdown文件
md_files = []
if recursive:
md_files = list(directory.rglob("*.md"))
else:
md_files = list(directory.glob("*.md"))
if not md_files:
return {"message": "未找到markdown文件", "files": []}
# 分析每个文件
results = []
for md_file in md_files:
try:
analysis = self.analyze_markdown(str(md_file))
results.append(analysis)
except Exception as e:
results.append({
'filepath': str(md_file),
'error': str(e)
})
# 按完整性评分排序
valid_results = [r for r in results if 'completeness_score' in r]
sorted_results = sorted(valid_results, key=lambda x: x['completeness_score'], reverse=True)
return {
'total_files': len(md_files),
'successful_analysis': len(valid_results),
'failed_analysis': len(results) - len(valid_results),
'results': sorted_results
}
def generate_directory_report(self, analysis_results, output_format='text'):
"""生成目录分析报告"""
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案目录分析报告")
report_lines.append(f"分析文件数: {analysis_results['total_files']}")
report_lines.append(f"成功分析: {analysis_results['successful_analysis']}")
if analysis_results['failed_analysis'] > 0:
report_lines.append(f"分析失败: {analysis_results['failed_analysis']}")
report_lines.append("=" * 60)
report_lines.append("")
# 文件列表(按评分排序)
if analysis_results['results']:
report_lines.append(f"{self.symbols['file']} 文件完整性排名")
for i, result in enumerate(analysis_results['results'], 1):
score = result['completeness_score']
level = result['completeness_level']
filename = os.path.basename(result['filepath'])
# 使用符号表示等级
if score >= 80:
icon = self.symbols['green_circle']
elif score >= 60:
icon = self.symbols['yellow_circle']
else:
icon = self.symbols['red_circle']
report_lines.append(f"{icon} {i:2d}. {filename:<40} {score:5.1f}分 ({level})")
report_lines.append("")
# 统计信息
avg_score = sum(r['completeness_score'] for r in analysis_results['results']) / len(analysis_results['results'])
max_score = max(r['completeness_score'] for r in analysis_results['results'])
min_score = min(r['completeness_score'] for r in analysis_results['results'])
report_lines.append(f"{self.symbols['chart']} 统计信息")
report_lines.append(f" 平均分: {avg_score:.1f}")
report_lines.append(f" 最高分: {max_score:.1f}")
report_lines.append(f" 最低分: {min_score:.1f}")
report_lines.append("")
# 建议
report_lines.append(f"{self.symbols['bulb']} 整体建议")
if avg_score >= 75:
report_lines.append(f" {self.symbols['check']} 整体完成度良好")
report_lines.append(f" {self.symbols['arrow']} 可以考虑开始故事创作")
elif avg_score >= 50:
report_lines.append(f" {self.symbols['warning']} 整体完成度一般")
report_lines.append(f" {self.symbols['arrow']} 建议继续完善人物档案")
else:
report_lines.append(f" {self.symbols['cross']} 整体完成度较低")
report_lines.append(f" {self.symbols['arrow']} 需要重点完善主要角色的档案")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
else:
import json
return json.dumps(analysis_results, ensure_ascii=False, indent=2)
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description='分析人物档案markdown文件')
parser.add_argument('path', help='要分析的markdown文件或目录路径')
parser.add_argument('--recursive', '-r', action='store_true', help='递归分析目录')
parser.add_argument('--format', '-f', choices=['text', 'json'], default='text', help='输出格式')
parser.add_argument('--output', '-o', help='输出文件路径')
args = parser.parse_args()
analyzer = ProfileAnalyzer()
path = Path(args.path)
try:
if path.is_file():
# 分析单个文件
if path.suffix.lower() != '.md':
print("错误: 文件必须是.md格式")
return 1
analysis = analyzer.analyze_markdown(str(path))
report = analyzer.generate_report(analysis, args.format)
elif path.is_dir():
# 分析目录
analysis_results = analyzer.analyze_directory(str(path), args.recursive)
report = analyzer.generate_directory_report(analysis_results, args.format)
else:
print(f"错误: 路径不存在: {args.path}")
return 1
# 输出结果
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存到: {args.output}")
else:
print(report)
return 0
except Exception as e:
print(f"分析失败: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())