401 lines
15 KiB
Python
401 lines
15 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
人物档案分析脚本
|
||
分析markdown格式的人物档案完整性
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
class ProfileAnalyzer:
|
||
"""人物档案分析器"""
|
||
|
||
def __init__(self):
|
||
self.placeholder_patterns = [
|
||
r'待填写',
|
||
r'TODO',
|
||
r'待补充',
|
||
r'待完善',
|
||
r'\[.*\]',
|
||
r'\{.*\}',
|
||
r'未指定',
|
||
r'未知',
|
||
r'待定'
|
||
]
|
||
|
||
# 检测平台,Windows上使用简单符号
|
||
self.is_windows = sys.platform.startswith('win')
|
||
|
||
# 符号定义
|
||
if self.is_windows:
|
||
self.symbols = {
|
||
'chart': '[统计]',
|
||
'trophy': '[评估]',
|
||
'file': '[章节]',
|
||
'warning': '[注意]',
|
||
'bulb': '[建议]',
|
||
'check': '[OK]',
|
||
'arrow': '->',
|
||
'cross': '[X]',
|
||
'green_circle': '[良好]',
|
||
'yellow_circle': '[一般]',
|
||
'red_circle': '[需改进]',
|
||
'bullet': '-'
|
||
}
|
||
else:
|
||
self.symbols = {
|
||
'chart': '📊',
|
||
'trophy': '🏆',
|
||
'file': '📑',
|
||
'warning': '⚠️',
|
||
'bulb': '💡',
|
||
'check': '✓',
|
||
'arrow': '→',
|
||
'cross': '✗',
|
||
'green_circle': '🟢',
|
||
'yellow_circle': '🟡',
|
||
'red_circle': '🔴',
|
||
'bullet': '•'
|
||
}
|
||
|
||
def analyze_markdown(self, filepath):
|
||
"""分析markdown档案
|
||
|
||
Args:
|
||
filepath: markdown文件路径
|
||
|
||
Returns:
|
||
分析结果字典
|
||
"""
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 基本统计
|
||
lines = content.split('\n')
|
||
total_lines = len(lines)
|
||
non_empty_lines = len([line for line in lines if line.strip()])
|
||
|
||
# 章节分析
|
||
sections = self._extract_sections(content)
|
||
|
||
# 占位符检测
|
||
placeholder_count = 0
|
||
placeholder_details = []
|
||
|
||
for i, line in enumerate(lines, 1):
|
||
for pattern in self.placeholder_patterns:
|
||
if re.search(pattern, line):
|
||
placeholder_count += 1
|
||
placeholder_details.append({
|
||
'line': i,
|
||
'content': line.strip()[:50] + '...' if len(line.strip()) > 50 else line.strip()
|
||
})
|
||
break
|
||
|
||
# 完整性评分
|
||
completeness_score = self._calculate_completeness_score(
|
||
non_empty_lines, placeholder_count, len(sections)
|
||
)
|
||
|
||
return {
|
||
'filepath': filepath,
|
||
'total_lines': total_lines,
|
||
'non_empty_lines': non_empty_lines,
|
||
'sections_count': len(sections),
|
||
'sections': sections,
|
||
'placeholder_count': placeholder_count,
|
||
'placeholder_details': placeholder_details[:10], # 只显示前10个
|
||
'completeness_score': completeness_score,
|
||
'completeness_level': self._get_completeness_level(completeness_score)
|
||
}
|
||
|
||
def _extract_sections(self, content):
|
||
"""提取章节信息"""
|
||
sections = []
|
||
|
||
# 匹配各级标题
|
||
header_pattern = r'^(#{1,3})\s+(.+?)$'
|
||
|
||
lines = content.split('\n')
|
||
current_section = None
|
||
|
||
for i, line in enumerate(lines):
|
||
match = re.match(header_pattern, line.strip())
|
||
if match:
|
||
level = len(match.group(1))
|
||
title = match.group(2).strip()
|
||
|
||
# 计算章节内容行数(直到下一个标题)
|
||
content_lines = 0
|
||
for j in range(i + 1, len(lines)):
|
||
if re.match(r'^#{1,3}\s+', lines[j].strip()):
|
||
break
|
||
if lines[j].strip():
|
||
content_lines += 1
|
||
|
||
sections.append({
|
||
'level': level,
|
||
'title': title,
|
||
'start_line': i + 1,
|
||
'content_lines': content_lines
|
||
})
|
||
|
||
return sections
|
||
|
||
def _calculate_completeness_score(self, non_empty_lines, placeholder_count, sections_count):
|
||
"""计算完整性评分(0-100)"""
|
||
if non_empty_lines == 0:
|
||
return 0
|
||
|
||
# 基础分:基于非空行数(假设完整档案至少50行)
|
||
base_score = min(100, (non_empty_lines / 50) * 60)
|
||
|
||
# 占位符扣分:每个占位符扣2分,最多扣30分
|
||
placeholder_penalty = min(30, placeholder_count * 2)
|
||
|
||
# 章节加分:每节加5分,最多加20分
|
||
section_bonus = min(20, sections_count * 5)
|
||
|
||
score = base_score - placeholder_penalty + section_bonus
|
||
return max(0, min(100, score))
|
||
|
||
def _get_completeness_level(self, score):
|
||
"""获取完整性等级"""
|
||
if score >= 90:
|
||
return "优秀"
|
||
elif score >= 75:
|
||
return "良好"
|
||
elif score >= 60:
|
||
return "一般"
|
||
elif score >= 40:
|
||
return "待完善"
|
||
else:
|
||
return "草稿"
|
||
|
||
def generate_report(self, analysis_result, output_format='text'):
|
||
"""生成分析报告"""
|
||
result = analysis_result
|
||
|
||
if output_format == 'text':
|
||
report_lines = []
|
||
report_lines.append("=" * 60)
|
||
report_lines.append(f"人物档案分析报告")
|
||
report_lines.append(f"文件: {result['filepath']}")
|
||
report_lines.append("=" * 60)
|
||
report_lines.append("")
|
||
|
||
# 基本信息
|
||
report_lines.append(f"{self.symbols['chart']} 基本信息")
|
||
report_lines.append(f" 总行数: {result['total_lines']}")
|
||
report_lines.append(f" 非空行数: {result['non_empty_lines']}")
|
||
report_lines.append(f" 章节数: {result['sections_count']}")
|
||
report_lines.append(f" 占位符数量: {result['placeholder_count']}")
|
||
report_lines.append("")
|
||
|
||
# 完整性评分
|
||
report_lines.append(f"{self.symbols['trophy']} 完整性评估")
|
||
report_lines.append(f" 评分: {result['completeness_score']:.1f}/100")
|
||
report_lines.append(f" 等级: {result['completeness_level']}")
|
||
report_lines.append("")
|
||
|
||
# 章节详情
|
||
if result['sections']:
|
||
report_lines.append(f"{self.symbols['file']} 章节详情")
|
||
for section in result['sections']:
|
||
level_indent = " " * (section['level'] - 1)
|
||
report_lines.append(f"{level_indent}{self.symbols['bullet']} {section['title']} (行 {section['start_line']}, {section['content_lines']} 行)")
|
||
report_lines.append("")
|
||
|
||
# 占位符详情
|
||
if result['placeholder_details']:
|
||
report_lines.append(f"{self.symbols['warning']} 需要完善的部分")
|
||
for detail in result['placeholder_details']:
|
||
report_lines.append(f" 第 {detail['line']} 行: {detail['content']}")
|
||
|
||
if result['placeholder_count'] > 10:
|
||
report_lines.append(f" ... 还有 {result['placeholder_count'] - 10} 个占位符未显示")
|
||
report_lines.append("")
|
||
|
||
# 建议
|
||
report_lines.append(f"{self.symbols['bulb']} 建议")
|
||
if result['completeness_score'] >= 80:
|
||
report_lines.append(f" {self.symbols['check']} 档案比较完整,可以开始用于创作")
|
||
report_lines.append(f" {self.symbols['arrow']} 可以考虑添加更多细节和情感描写")
|
||
elif result['completeness_score'] >= 60:
|
||
report_lines.append(f" {self.symbols['warning']} 档案基本完整,但还有完善空间")
|
||
report_lines.append(f" {self.symbols['arrow']} 建议完善 {result['placeholder_count']} 处占位符")
|
||
else:
|
||
report_lines.append(f" {self.symbols['cross']} 档案还处于草稿阶段")
|
||
report_lines.append(f" {self.symbols['arrow']} 需要补充大量内容,建议逐个章节完善")
|
||
|
||
report_lines.append("")
|
||
report_lines.append("=" * 60)
|
||
|
||
return "\n".join(report_lines)
|
||
|
||
elif output_format == 'json':
|
||
import json
|
||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||
|
||
else:
|
||
raise ValueError(f"不支持的输出格式: {output_format}")
|
||
|
||
def analyze_directory(self, directory_path, recursive=True):
|
||
"""分析目录下的所有markdown档案"""
|
||
directory = Path(directory_path)
|
||
|
||
if not directory.exists():
|
||
raise FileNotFoundError(f"目录不存在: {directory_path}")
|
||
|
||
# 查找markdown文件
|
||
md_files = []
|
||
if recursive:
|
||
md_files = list(directory.rglob("*.md"))
|
||
else:
|
||
md_files = list(directory.glob("*.md"))
|
||
|
||
if not md_files:
|
||
return {"message": "未找到markdown文件", "files": []}
|
||
|
||
# 分析每个文件
|
||
results = []
|
||
for md_file in md_files:
|
||
try:
|
||
analysis = self.analyze_markdown(str(md_file))
|
||
results.append(analysis)
|
||
except Exception as e:
|
||
results.append({
|
||
'filepath': str(md_file),
|
||
'error': str(e)
|
||
})
|
||
|
||
# 按完整性评分排序
|
||
valid_results = [r for r in results if 'completeness_score' in r]
|
||
sorted_results = sorted(valid_results, key=lambda x: x['completeness_score'], reverse=True)
|
||
|
||
return {
|
||
'total_files': len(md_files),
|
||
'successful_analysis': len(valid_results),
|
||
'failed_analysis': len(results) - len(valid_results),
|
||
'results': sorted_results
|
||
}
|
||
|
||
def generate_directory_report(self, analysis_results, output_format='text'):
|
||
"""生成目录分析报告"""
|
||
if output_format == 'text':
|
||
report_lines = []
|
||
report_lines.append("=" * 60)
|
||
report_lines.append(f"人物档案目录分析报告")
|
||
report_lines.append(f"分析文件数: {analysis_results['total_files']}")
|
||
report_lines.append(f"成功分析: {analysis_results['successful_analysis']}")
|
||
if analysis_results['failed_analysis'] > 0:
|
||
report_lines.append(f"分析失败: {analysis_results['failed_analysis']}")
|
||
report_lines.append("=" * 60)
|
||
report_lines.append("")
|
||
|
||
# 文件列表(按评分排序)
|
||
if analysis_results['results']:
|
||
report_lines.append(f"{self.symbols['file']} 文件完整性排名")
|
||
for i, result in enumerate(analysis_results['results'], 1):
|
||
score = result['completeness_score']
|
||
level = result['completeness_level']
|
||
filename = os.path.basename(result['filepath'])
|
||
|
||
# 使用符号表示等级
|
||
if score >= 80:
|
||
icon = self.symbols['green_circle']
|
||
elif score >= 60:
|
||
icon = self.symbols['yellow_circle']
|
||
else:
|
||
icon = self.symbols['red_circle']
|
||
|
||
report_lines.append(f"{icon} {i:2d}. {filename:<40} {score:5.1f}分 ({level})")
|
||
|
||
report_lines.append("")
|
||
|
||
# 统计信息
|
||
avg_score = sum(r['completeness_score'] for r in analysis_results['results']) / len(analysis_results['results'])
|
||
max_score = max(r['completeness_score'] for r in analysis_results['results'])
|
||
min_score = min(r['completeness_score'] for r in analysis_results['results'])
|
||
|
||
report_lines.append(f"{self.symbols['chart']} 统计信息")
|
||
report_lines.append(f" 平均分: {avg_score:.1f}")
|
||
report_lines.append(f" 最高分: {max_score:.1f}")
|
||
report_lines.append(f" 最低分: {min_score:.1f}")
|
||
report_lines.append("")
|
||
|
||
# 建议
|
||
report_lines.append(f"{self.symbols['bulb']} 整体建议")
|
||
if avg_score >= 75:
|
||
report_lines.append(f" {self.symbols['check']} 整体完成度良好")
|
||
report_lines.append(f" {self.symbols['arrow']} 可以考虑开始故事创作")
|
||
elif avg_score >= 50:
|
||
report_lines.append(f" {self.symbols['warning']} 整体完成度一般")
|
||
report_lines.append(f" {self.symbols['arrow']} 建议继续完善人物档案")
|
||
else:
|
||
report_lines.append(f" {self.symbols['cross']} 整体完成度较低")
|
||
report_lines.append(f" {self.symbols['arrow']} 需要重点完善主要角色的档案")
|
||
|
||
report_lines.append("")
|
||
report_lines.append("=" * 60)
|
||
|
||
return "\n".join(report_lines)
|
||
|
||
else:
|
||
import json
|
||
return json.dumps(analysis_results, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description='分析人物档案markdown文件')
|
||
parser.add_argument('path', help='要分析的markdown文件或目录路径')
|
||
parser.add_argument('--recursive', '-r', action='store_true', help='递归分析目录')
|
||
parser.add_argument('--format', '-f', choices=['text', 'json'], default='text', help='输出格式')
|
||
parser.add_argument('--output', '-o', help='输出文件路径')
|
||
|
||
args = parser.parse_args()
|
||
|
||
analyzer = ProfileAnalyzer()
|
||
path = Path(args.path)
|
||
|
||
try:
|
||
if path.is_file():
|
||
# 分析单个文件
|
||
if path.suffix.lower() != '.md':
|
||
print("错误: 文件必须是.md格式")
|
||
return 1
|
||
|
||
analysis = analyzer.analyze_markdown(str(path))
|
||
report = analyzer.generate_report(analysis, args.format)
|
||
|
||
elif path.is_dir():
|
||
# 分析目录
|
||
analysis_results = analyzer.analyze_directory(str(path), args.recursive)
|
||
report = analyzer.generate_directory_report(analysis_results, args.format)
|
||
|
||
else:
|
||
print(f"错误: 路径不存在: {args.path}")
|
||
return 1
|
||
|
||
# 输出结果
|
||
if args.output:
|
||
with open(args.output, 'w', encoding='utf-8') as f:
|
||
f.write(report)
|
||
print(f"报告已保存到: {args.output}")
|
||
else:
|
||
print(report)
|
||
|
||
return 0
|
||
|
||
except Exception as e:
|
||
print(f"分析失败: {e}")
|
||
return 1
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main()) |