jianzhihuixiang/skills/character-profile-cn/scripts/validate_profile.py

522 lines
21 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
人物档案验证脚本
验证markdown格式的人物档案结构完整性
"""
import os
import re
import sys
from pathlib import Path
class ProfileValidator:
"""人物档案验证器"""
# 各类型角色的必需章节
REQUIRED_SECTIONS = {
'protagonist': [
'基本信息', '外貌特征', '性格特点', '背景故事',
'动机层次', '人物关系', '故事发展'
],
'antagonist': [
'基本信息', '外貌特征', '性格特点', '核心理念',
'动机发展', '镜像对比', '资源能力', '故事发展'
],
'supporting': [
'基本定位', '独立身份', '功能性设计', '关系发展',
'发展可能性'
],
'standard': [
'基本信息', '外貌特征', '性格特点', '背景故事',
'人物关系', '故事发展'
]
}
# 章节内的必需字段(根据模板)
REQUIRED_FIELDS = {
'基本信息': ['姓名', '年龄', '性别', '职业/身份', '故事中的角色'],
'外貌特征': ['整体印象', '面部特征', '身材体型', '着装风格'],
'性格特点': ['核心性格', '优点', '缺点', '价值观'],
'背景故事': ['出身背景', '关键经历', '转折点'],
'人物关系': ['与主角关系', '重要关系人'],
'故事发展': ['角色目标', '内在冲突', '外在冲突', '发展弧线']
}
def __init__(self, profile_type='auto'):
"""初始化验证器
Args:
profile_type: 档案类型可选值: 'protagonist', 'antagonist', 'supporting', 'standard', 'auto'
"""
self.profile_type = profile_type
# 检测平台Windows上使用简单符号
self.is_windows = sys.platform.startswith('win')
# 符号定义
if self.is_windows:
self.symbols = {
'building': '[结构]',
'cross_mark': '[缺失]',
'warning': '[注意]',
'check': '[通过]',
'wrench': '[修复]',
'check_mark': '[OK]',
'arrow': '->',
'green_circle': '[良好]',
'yellow_circle': '[一般]',
'red_circle': '[需改进]',
'file': '[文件]',
'chart': '[统计]',
'chart2': '[分布]',
'bulb': '[建议]',
'bullet': '-',
'dash': '-'
}
else:
self.symbols = {
'building': '🏗️',
'cross_mark': '',
'warning': '⚠️',
'check': '',
'wrench': '🔧',
'check_mark': '',
'arrow': '',
'green_circle': '🟢',
'yellow_circle': '🟡',
'red_circle': '🔴',
'file': '📋',
'chart': '📊',
'chart2': '📈',
'bulb': '💡',
'bullet': '',
'dash': '-'
}
def detect_profile_type(self, content):
"""检测档案类型"""
# 通过内容特征检测类型
lines = content.split('\n')
# 检查是否有特定章节
has_mirror = any('镜像对比' in line for line in lines)
has_core_belief = any('核心理念' in line for line in lines)
has_resources = any('资源能力' in line for line in lines)
has_function = any('功能性设计' in line for line in lines)
has_identity = any('独立身份' in line for line in lines)
has_motivation = any('动机层次' in line for line in lines)
has_core_identity = any('核心身份' in line for line in lines)
if has_mirror or has_core_belief or has_resources:
return 'antagonist'
elif has_function or has_identity:
return 'supporting'
elif has_motivation or has_core_identity:
return 'protagonist'
else:
return 'standard'
def validate_structure(self, filepath):
"""验证档案结构
Args:
filepath: markdown文件路径
Returns:
验证结果字典
"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 检测类型
if self.profile_type == 'auto':
detected_type = self.detect_profile_type(content)
else:
detected_type = self.profile_type
# 提取所有章节标题
sections = self._extract_sections(content)
# 检查必需章节
required_sections = self.REQUIRED_SECTIONS.get(detected_type, [])
missing_sections = []
present_sections = []
for required_section in required_sections:
if required_section not in sections:
missing_sections.append(required_section)
else:
present_sections.append(required_section)
# 检查章节内的必需字段
section_field_violations = {}
for section_title in present_sections:
section_content = self._get_section_content(content, section_title)
missing_fields = self._check_required_fields(section_title, section_content)
if missing_fields:
section_field_violations[section_title] = missing_fields
# 计算结构完整性评分
structure_score = self._calculate_structure_score(
len(required_sections), len(missing_sections), section_field_violations
)
return {
'filepath': filepath,
'detected_type': detected_type,
'total_sections_found': len(sections),
'required_sections': required_sections,
'present_sections': present_sections,
'missing_sections': missing_sections,
'section_field_violations': section_field_violations,
'structure_score': structure_score,
'structure_level': self._get_structure_level(structure_score)
}
def _extract_sections(self, content):
"""提取所有章节标题"""
sections = []
# 匹配二级和三级标题(## 和 ###
header_pattern = r'^#{2,3}\s+(.+?)$'
lines = content.split('\n')
for line in lines:
match = re.match(header_pattern, line.strip())
if match:
title = match.group(1).strip()
# 去掉可能的内部链接
title = re.sub(r'\[.*?\]\(.*?\)', '', title)
sections.append(title)
return sections
def _get_section_content(self, content, section_title):
"""获取指定章节的内容"""
lines = content.split('\n')
in_target_section = False
section_content = []
for line in lines:
# 检查是否是章节标题
if re.match(rf'^#{{2,3}}\s+{re.escape(section_title)}\s*$', line.strip()):
in_target_section = True
continue
# 如果进入下一个章节,停止收集
if in_target_section and re.match(r'^#{2,3}\s+', line.strip()):
break
# 收集内容行
if in_target_section:
section_content.append(line)
return '\n'.join(section_content)
def _check_required_fields(self, section_title, section_content):
"""检查章节内的必需字段"""
required_fields = self.REQUIRED_FIELDS.get(section_title, [])
if not required_fields:
return []
missing_fields = []
for field in required_fields:
# 检查字段是否出现(作为粗体文本)
pattern = rf'\*\*{re.escape(field)}\*\*'
if not re.search(pattern, section_content):
missing_fields.append(field)
return missing_fields
def _calculate_structure_score(self, total_required, missing_sections_count, field_violations):
"""计算结构完整性评分0-100"""
if total_required == 0:
return 100
# 章节完整性70分
section_score = ((total_required - missing_sections_count) / total_required) * 70
# 字段完整性30分
field_score = 30
if field_violations:
total_violations = sum(len(fields) for fields in field_violations.values())
# 每个缺失字段扣3分
field_penalty = min(30, total_violations * 3)
field_score -= field_penalty
total_score = section_score + field_score
return max(0, min(100, total_score))
def _get_structure_level(self, score):
"""获取结构完整性等级"""
if score >= 90:
return "优秀"
elif score >= 75:
return "良好"
elif score >= 60:
return "一般"
elif score >= 40:
return "不完整"
else:
return "结构缺失"
def generate_validation_report(self, validation_result, output_format='text'):
"""生成验证报告"""
result = validation_result
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案结构验证报告")
report_lines.append(f"文件: {result['filepath']}")
report_lines.append(f"检测类型: {result['detected_type']}")
report_lines.append("=" * 60)
report_lines.append("")
# 结构完整性
report_lines.append(f"{self.symbols['building']} 结构完整性")
report_lines.append(f" 评分: {result['structure_score']:.1f}/100")
report_lines.append(f" 等级: {result['structure_level']}")
report_lines.append(f" 发现章节: {result['total_sections_found']}")
report_lines.append("")
# 章节检查
if result['missing_sections']:
report_lines.append(f"{self.symbols['cross_mark']} 缺失的必需章节")
for section in result['missing_sections']:
report_lines.append(f" {self.symbols['bullet']} {section}")
report_lines.append("")
# 字段检查
if result['section_field_violations']:
report_lines.append(f"{self.symbols['warning']} 章节内缺失字段")
for section, fields in result['section_field_violations'].items():
report_lines.append(f" {self.symbols['bullet']} {section}:")
for field in fields:
report_lines.append(f" {self.symbols['dash']} {field}")
report_lines.append("")
# 通过检查的项目
report_lines.append(f"{self.symbols['check']} 通过的检查")
report_lines.append(f" {self.symbols['bullet']} 必需章节: {len(result['present_sections'])}/{len(result['required_sections'])}")
present_field_count = 0
total_field_count = 0
for section in result['present_sections']:
required_fields = self.REQUIRED_FIELDS.get(section, [])
total_field_count += len(required_fields)
if section not in result['section_field_violations']:
present_field_count += len(required_fields)
else:
missing_count = len(result['section_field_violations'][section])
present_field_count += (len(required_fields) - missing_count)
if total_field_count > 0:
report_lines.append(f" {self.symbols['bullet']} 必需字段: {present_field_count}/{total_field_count}")
report_lines.append("")
# 修复建议
report_lines.append(f"{self.symbols['wrench']} 修复建议")
if result['structure_score'] >= 80:
report_lines.append(f" {self.symbols['check_mark']} 结构完整,可以继续完善内容细节")
elif result['structure_score'] >= 60:
if result['missing_sections']:
report_lines.append(f" {self.symbols['arrow']} 添加缺失的章节: {', '.join(result['missing_sections'][:3])}")
if result['section_field_violations']:
first_section = list(result['section_field_violations'].keys())[0]
first_field = result['section_field_violations'][first_section][0]
report_lines.append(f" {self.symbols['arrow']} 补充字段: {first_section} → **{first_field}**")
else:
report_lines.append(f" {self.symbols['arrow']} 需要补充基本的结构框架")
report_lines.append(f" {self.symbols['arrow']} 建议使用'{result['detected_type']}'模板重新整理")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
elif output_format == 'json':
import json
return json.dumps(result, ensure_ascii=False, indent=2)
else:
raise ValueError(f"不支持的输出格式: {output_format}")
def validate_directory(self, directory_path, recursive=True):
"""验证目录下的所有markdown档案"""
directory = Path(directory_path)
if not directory.exists():
raise FileNotFoundError(f"目录不存在: {directory_path}")
# 查找markdown文件
md_files = []
if recursive:
md_files = list(directory.rglob("*.md"))
else:
md_files = list(directory.glob("*.md"))
if not md_files:
return {"message": "未找到markdown文件", "files": []}
# 验证每个文件
results = []
for md_file in md_files:
try:
validation = self.validate_structure(str(md_file))
results.append(validation)
except Exception as e:
results.append({
'filepath': str(md_file),
'error': str(e)
})
# 按结构评分排序
valid_results = [r for r in results if 'structure_score' in r]
sorted_results = sorted(valid_results, key=lambda x: x['structure_score'], reverse=True)
return {
'total_files': len(md_files),
'successful_validation': len(valid_results),
'failed_validation': len(results) - len(valid_results),
'results': sorted_results
}
def generate_directory_validation_report(self, validation_results, output_format='text'):
"""生成目录验证报告"""
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案结构验证报告(目录)")
report_lines.append(f"分析文件数: {validation_results['total_files']}")
report_lines.append(f"成功验证: {validation_results['successful_validation']}")
if validation_results['failed_validation'] > 0:
report_lines.append(f"验证失败: {validation_results['failed_validation']}")
report_lines.append("=" * 60)
report_lines.append("")
# 文件列表(按评分排序)
if validation_results['results']:
report_lines.append(f"{self.symbols['file']} 文件结构完整性排名")
for i, result in enumerate(validation_results['results'], 1):
score = result['structure_score']
level = result['structure_level']
filename = os.path.basename(result['filepath'])
profile_type = result.get('detected_type', '未知')
# 使用符号表示等级
if score >= 80:
icon = self.symbols['green_circle']
elif score >= 60:
icon = self.symbols['yellow_circle']
else:
icon = self.symbols['red_circle']
report_lines.append(f"{icon} {i:2d}. {filename:<35} {score:5.1f}分 ({level}, {profile_type})")
report_lines.append("")
# 统计信息
avg_score = sum(r['structure_score'] for r in validation_results['results']) / len(validation_results['results'])
max_score = max(r['structure_score'] for r in validation_results['results'])
min_score = min(r['structure_score'] for r in validation_results['results'])
# 类型分布
type_distribution = {}
for result in validation_results['results']:
profile_type = result.get('detected_type', '未知')
type_distribution[profile_type] = type_distribution.get(profile_type, 0) + 1
report_lines.append(f"{self.symbols['chart']} 统计信息")
report_lines.append(f" 平均结构分: {avg_score:.1f}")
report_lines.append(f" 最高分: {max_score:.1f}")
report_lines.append(f" 最低分: {min_score:.1f}")
report_lines.append("")
report_lines.append(f"{self.symbols['chart2']} 类型分布")
for profile_type, count in type_distribution.items():
percentage = (count / len(validation_results['results'])) * 100
report_lines.append(f" {profile_type}: {count}个 ({percentage:.1f}%)")
report_lines.append("")
# 整体建议
report_lines.append(f"{self.symbols['bulb']} 整体建议")
if avg_score >= 75:
report_lines.append(f" {self.symbols['check_mark']} 整体结构良好")
report_lines.append(f" {self.symbols['arrow']} 可以开始关注内容深度和细节")
elif avg_score >= 50:
report_lines.append(f" {self.symbols['warning']} 结构基本完整但有缺失")
report_lines.append(f" {self.symbols['arrow']} 建议补充缺失章节和字段")
else:
report_lines.append(f" {self.symbols['cross_mark']} 结构完整性不足")
report_lines.append(f" {self.symbols['arrow']} 需要重新整理档案结构框架")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
else:
import json
return json.dumps(validation_results, ensure_ascii=False, indent=2)
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description='验证人物档案markdown文件结构')
parser.add_argument('path', help='要验证的markdown文件或目录路径')
parser.add_argument('--type', '-t', choices=['protagonist', 'antagonist', 'supporting', 'standard', 'auto'],
default='auto', help='档案类型(默认为自动检测)')
parser.add_argument('--recursive', '-r', action='store_true', help='递归验证目录')
parser.add_argument('--format', '-f', choices=['text', 'json'], default='text', help='输出格式')
parser.add_argument('--output', '-o', help='输出文件路径')
args = parser.parse_args()
validator = ProfileValidator(args.type)
path = Path(args.path)
try:
if path.is_file():
# 验证单个文件
if path.suffix.lower() != '.md':
print("错误: 文件必须是.md格式")
return 1
validation = validator.validate_structure(str(path))
report = validator.generate_validation_report(validation, args.format)
elif path.is_dir():
# 验证目录
validation_results = validator.validate_directory(str(path), args.recursive)
report = validator.generate_directory_validation_report(validation_results, args.format)
else:
print(f"错误: 路径不存在: {args.path}")
return 1
# 输出结果
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存到: {args.output}")
else:
print(report)
return 0
except Exception as e:
print(f"验证失败: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())