jianzhihuixiang/skills/character-profile-cn/scripts/validate_profile.py

522 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
人物档案验证脚本
验证markdown格式的人物档案结构完整性
"""
import os
import re
import sys
from pathlib import Path
class ProfileValidator:
"""人物档案验证器"""
# 各类型角色的必需章节
REQUIRED_SECTIONS = {
'protagonist': [
'基本信息', '外貌特征', '性格特点', '背景故事',
'动机层次', '人物关系', '故事发展'
],
'antagonist': [
'基本信息', '外貌特征', '性格特点', '核心理念',
'动机发展', '镜像对比', '资源能力', '故事发展'
],
'supporting': [
'基本定位', '独立身份', '功能性设计', '关系发展',
'发展可能性'
],
'standard': [
'基本信息', '外貌特征', '性格特点', '背景故事',
'人物关系', '故事发展'
]
}
# 章节内的必需字段(根据模板)
REQUIRED_FIELDS = {
'基本信息': ['姓名', '年龄', '性别', '职业/身份', '故事中的角色'],
'外貌特征': ['整体印象', '面部特征', '身材体型', '着装风格'],
'性格特点': ['核心性格', '优点', '缺点', '价值观'],
'背景故事': ['出身背景', '关键经历', '转折点'],
'人物关系': ['与主角关系', '重要关系人'],
'故事发展': ['角色目标', '内在冲突', '外在冲突', '发展弧线']
}
def __init__(self, profile_type='auto'):
"""初始化验证器
Args:
profile_type: 档案类型,可选值: 'protagonist', 'antagonist', 'supporting', 'standard', 'auto'
"""
self.profile_type = profile_type
# 检测平台Windows上使用简单符号
self.is_windows = sys.platform.startswith('win')
# 符号定义
if self.is_windows:
self.symbols = {
'building': '[结构]',
'cross_mark': '[缺失]',
'warning': '[注意]',
'check': '[通过]',
'wrench': '[修复]',
'check_mark': '[OK]',
'arrow': '->',
'green_circle': '[良好]',
'yellow_circle': '[一般]',
'red_circle': '[需改进]',
'file': '[文件]',
'chart': '[统计]',
'chart2': '[分布]',
'bulb': '[建议]',
'bullet': '-',
'dash': '-'
}
else:
self.symbols = {
'building': '🏗️',
'cross_mark': '',
'warning': '⚠️',
'check': '',
'wrench': '🔧',
'check_mark': '',
'arrow': '',
'green_circle': '🟢',
'yellow_circle': '🟡',
'red_circle': '🔴',
'file': '📋',
'chart': '📊',
'chart2': '📈',
'bulb': '💡',
'bullet': '',
'dash': '-'
}
def detect_profile_type(self, content):
"""检测档案类型"""
# 通过内容特征检测类型
lines = content.split('\n')
# 检查是否有特定章节
has_mirror = any('镜像对比' in line for line in lines)
has_core_belief = any('核心理念' in line for line in lines)
has_resources = any('资源能力' in line for line in lines)
has_function = any('功能性设计' in line for line in lines)
has_identity = any('独立身份' in line for line in lines)
has_motivation = any('动机层次' in line for line in lines)
has_core_identity = any('核心身份' in line for line in lines)
if has_mirror or has_core_belief or has_resources:
return 'antagonist'
elif has_function or has_identity:
return 'supporting'
elif has_motivation or has_core_identity:
return 'protagonist'
else:
return 'standard'
def validate_structure(self, filepath):
"""验证档案结构
Args:
filepath: markdown文件路径
Returns:
验证结果字典
"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 检测类型
if self.profile_type == 'auto':
detected_type = self.detect_profile_type(content)
else:
detected_type = self.profile_type
# 提取所有章节标题
sections = self._extract_sections(content)
# 检查必需章节
required_sections = self.REQUIRED_SECTIONS.get(detected_type, [])
missing_sections = []
present_sections = []
for required_section in required_sections:
if required_section not in sections:
missing_sections.append(required_section)
else:
present_sections.append(required_section)
# 检查章节内的必需字段
section_field_violations = {}
for section_title in present_sections:
section_content = self._get_section_content(content, section_title)
missing_fields = self._check_required_fields(section_title, section_content)
if missing_fields:
section_field_violations[section_title] = missing_fields
# 计算结构完整性评分
structure_score = self._calculate_structure_score(
len(required_sections), len(missing_sections), section_field_violations
)
return {
'filepath': filepath,
'detected_type': detected_type,
'total_sections_found': len(sections),
'required_sections': required_sections,
'present_sections': present_sections,
'missing_sections': missing_sections,
'section_field_violations': section_field_violations,
'structure_score': structure_score,
'structure_level': self._get_structure_level(structure_score)
}
def _extract_sections(self, content):
"""提取所有章节标题"""
sections = []
# 匹配二级和三级标题(## 和 ###
header_pattern = r'^#{2,3}\s+(.+?)$'
lines = content.split('\n')
for line in lines:
match = re.match(header_pattern, line.strip())
if match:
title = match.group(1).strip()
# 去掉可能的内部链接
title = re.sub(r'\[.*?\]\(.*?\)', '', title)
sections.append(title)
return sections
def _get_section_content(self, content, section_title):
"""获取指定章节的内容"""
lines = content.split('\n')
in_target_section = False
section_content = []
for line in lines:
# 检查是否是章节标题
if re.match(rf'^#{{2,3}}\s+{re.escape(section_title)}\s*$', line.strip()):
in_target_section = True
continue
# 如果进入下一个章节,停止收集
if in_target_section and re.match(r'^#{2,3}\s+', line.strip()):
break
# 收集内容行
if in_target_section:
section_content.append(line)
return '\n'.join(section_content)
def _check_required_fields(self, section_title, section_content):
"""检查章节内的必需字段"""
required_fields = self.REQUIRED_FIELDS.get(section_title, [])
if not required_fields:
return []
missing_fields = []
for field in required_fields:
# 检查字段是否出现(作为粗体文本)
pattern = rf'\*\*{re.escape(field)}\*\*'
if not re.search(pattern, section_content):
missing_fields.append(field)
return missing_fields
def _calculate_structure_score(self, total_required, missing_sections_count, field_violations):
"""计算结构完整性评分0-100"""
if total_required == 0:
return 100
# 章节完整性70分
section_score = ((total_required - missing_sections_count) / total_required) * 70
# 字段完整性30分
field_score = 30
if field_violations:
total_violations = sum(len(fields) for fields in field_violations.values())
# 每个缺失字段扣3分
field_penalty = min(30, total_violations * 3)
field_score -= field_penalty
total_score = section_score + field_score
return max(0, min(100, total_score))
def _get_structure_level(self, score):
"""获取结构完整性等级"""
if score >= 90:
return "优秀"
elif score >= 75:
return "良好"
elif score >= 60:
return "一般"
elif score >= 40:
return "不完整"
else:
return "结构缺失"
def generate_validation_report(self, validation_result, output_format='text'):
"""生成验证报告"""
result = validation_result
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案结构验证报告")
report_lines.append(f"文件: {result['filepath']}")
report_lines.append(f"检测类型: {result['detected_type']}")
report_lines.append("=" * 60)
report_lines.append("")
# 结构完整性
report_lines.append(f"{self.symbols['building']} 结构完整性")
report_lines.append(f" 评分: {result['structure_score']:.1f}/100")
report_lines.append(f" 等级: {result['structure_level']}")
report_lines.append(f" 发现章节: {result['total_sections_found']}")
report_lines.append("")
# 章节检查
if result['missing_sections']:
report_lines.append(f"{self.symbols['cross_mark']} 缺失的必需章节")
for section in result['missing_sections']:
report_lines.append(f" {self.symbols['bullet']} {section}")
report_lines.append("")
# 字段检查
if result['section_field_violations']:
report_lines.append(f"{self.symbols['warning']} 章节内缺失字段")
for section, fields in result['section_field_violations'].items():
report_lines.append(f" {self.symbols['bullet']} {section}:")
for field in fields:
report_lines.append(f" {self.symbols['dash']} {field}")
report_lines.append("")
# 通过检查的项目
report_lines.append(f"{self.symbols['check']} 通过的检查")
report_lines.append(f" {self.symbols['bullet']} 必需章节: {len(result['present_sections'])}/{len(result['required_sections'])}")
present_field_count = 0
total_field_count = 0
for section in result['present_sections']:
required_fields = self.REQUIRED_FIELDS.get(section, [])
total_field_count += len(required_fields)
if section not in result['section_field_violations']:
present_field_count += len(required_fields)
else:
missing_count = len(result['section_field_violations'][section])
present_field_count += (len(required_fields) - missing_count)
if total_field_count > 0:
report_lines.append(f" {self.symbols['bullet']} 必需字段: {present_field_count}/{total_field_count}")
report_lines.append("")
# 修复建议
report_lines.append(f"{self.symbols['wrench']} 修复建议")
if result['structure_score'] >= 80:
report_lines.append(f" {self.symbols['check_mark']} 结构完整,可以继续完善内容细节")
elif result['structure_score'] >= 60:
if result['missing_sections']:
report_lines.append(f" {self.symbols['arrow']} 添加缺失的章节: {', '.join(result['missing_sections'][:3])}")
if result['section_field_violations']:
first_section = list(result['section_field_violations'].keys())[0]
first_field = result['section_field_violations'][first_section][0]
report_lines.append(f" {self.symbols['arrow']} 补充字段: {first_section} → **{first_field}**")
else:
report_lines.append(f" {self.symbols['arrow']} 需要补充基本的结构框架")
report_lines.append(f" {self.symbols['arrow']} 建议使用'{result['detected_type']}'模板重新整理")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
elif output_format == 'json':
import json
return json.dumps(result, ensure_ascii=False, indent=2)
else:
raise ValueError(f"不支持的输出格式: {output_format}")
def validate_directory(self, directory_path, recursive=True):
"""验证目录下的所有markdown档案"""
directory = Path(directory_path)
if not directory.exists():
raise FileNotFoundError(f"目录不存在: {directory_path}")
# 查找markdown文件
md_files = []
if recursive:
md_files = list(directory.rglob("*.md"))
else:
md_files = list(directory.glob("*.md"))
if not md_files:
return {"message": "未找到markdown文件", "files": []}
# 验证每个文件
results = []
for md_file in md_files:
try:
validation = self.validate_structure(str(md_file))
results.append(validation)
except Exception as e:
results.append({
'filepath': str(md_file),
'error': str(e)
})
# 按结构评分排序
valid_results = [r for r in results if 'structure_score' in r]
sorted_results = sorted(valid_results, key=lambda x: x['structure_score'], reverse=True)
return {
'total_files': len(md_files),
'successful_validation': len(valid_results),
'failed_validation': len(results) - len(valid_results),
'results': sorted_results
}
def generate_directory_validation_report(self, validation_results, output_format='text'):
"""生成目录验证报告"""
if output_format == 'text':
report_lines = []
report_lines.append("=" * 60)
report_lines.append(f"人物档案结构验证报告(目录)")
report_lines.append(f"分析文件数: {validation_results['total_files']}")
report_lines.append(f"成功验证: {validation_results['successful_validation']}")
if validation_results['failed_validation'] > 0:
report_lines.append(f"验证失败: {validation_results['failed_validation']}")
report_lines.append("=" * 60)
report_lines.append("")
# 文件列表(按评分排序)
if validation_results['results']:
report_lines.append(f"{self.symbols['file']} 文件结构完整性排名")
for i, result in enumerate(validation_results['results'], 1):
score = result['structure_score']
level = result['structure_level']
filename = os.path.basename(result['filepath'])
profile_type = result.get('detected_type', '未知')
# 使用符号表示等级
if score >= 80:
icon = self.symbols['green_circle']
elif score >= 60:
icon = self.symbols['yellow_circle']
else:
icon = self.symbols['red_circle']
report_lines.append(f"{icon} {i:2d}. {filename:<35} {score:5.1f}分 ({level}, {profile_type})")
report_lines.append("")
# 统计信息
avg_score = sum(r['structure_score'] for r in validation_results['results']) / len(validation_results['results'])
max_score = max(r['structure_score'] for r in validation_results['results'])
min_score = min(r['structure_score'] for r in validation_results['results'])
# 类型分布
type_distribution = {}
for result in validation_results['results']:
profile_type = result.get('detected_type', '未知')
type_distribution[profile_type] = type_distribution.get(profile_type, 0) + 1
report_lines.append(f"{self.symbols['chart']} 统计信息")
report_lines.append(f" 平均结构分: {avg_score:.1f}")
report_lines.append(f" 最高分: {max_score:.1f}")
report_lines.append(f" 最低分: {min_score:.1f}")
report_lines.append("")
report_lines.append(f"{self.symbols['chart2']} 类型分布")
for profile_type, count in type_distribution.items():
percentage = (count / len(validation_results['results'])) * 100
report_lines.append(f" {profile_type}: {count}个 ({percentage:.1f}%)")
report_lines.append("")
# 整体建议
report_lines.append(f"{self.symbols['bulb']} 整体建议")
if avg_score >= 75:
report_lines.append(f" {self.symbols['check_mark']} 整体结构良好")
report_lines.append(f" {self.symbols['arrow']} 可以开始关注内容深度和细节")
elif avg_score >= 50:
report_lines.append(f" {self.symbols['warning']} 结构基本完整但有缺失")
report_lines.append(f" {self.symbols['arrow']} 建议补充缺失章节和字段")
else:
report_lines.append(f" {self.symbols['cross_mark']} 结构完整性不足")
report_lines.append(f" {self.symbols['arrow']} 需要重新整理档案结构框架")
report_lines.append("")
report_lines.append("=" * 60)
return "\n".join(report_lines)
else:
import json
return json.dumps(validation_results, ensure_ascii=False, indent=2)
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description='验证人物档案markdown文件结构')
parser.add_argument('path', help='要验证的markdown文件或目录路径')
parser.add_argument('--type', '-t', choices=['protagonist', 'antagonist', 'supporting', 'standard', 'auto'],
default='auto', help='档案类型(默认为自动检测)')
parser.add_argument('--recursive', '-r', action='store_true', help='递归验证目录')
parser.add_argument('--format', '-f', choices=['text', 'json'], default='text', help='输出格式')
parser.add_argument('--output', '-o', help='输出文件路径')
args = parser.parse_args()
validator = ProfileValidator(args.type)
path = Path(args.path)
try:
if path.is_file():
# 验证单个文件
if path.suffix.lower() != '.md':
print("错误: 文件必须是.md格式")
return 1
validation = validator.validate_structure(str(path))
report = validator.generate_validation_report(validation, args.format)
elif path.is_dir():
# 验证目录
validation_results = validator.validate_directory(str(path), args.recursive)
report = validator.generate_directory_validation_report(validation_results, args.format)
else:
print(f"错误: 路径不存在: {args.path}")
return 1
# 输出结果
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存到: {args.output}")
else:
print(report)
return 0
except Exception as e:
print(f"验证失败: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())