novel-doomsday-resurgence/novel/generate_graph.py

#!/usr/bin/env python3
"""
Novel Character Relationship Graph Generator (Corrected Names)

Extracts entities and relationships from novel character notes
and generates visualizable relationship graph.
"""

import os
import re
import json
from pathlib import Path
from typing import Dict, List, Any

class NovelCharacterGraph:
    def __init__(self, characters_path: str):
        self.characters_path = Path(characters_path)
        self.characters = {}
        self.relationships = []

    def load_characters(self):
        """Load all character markdown files"""
        md_files = list(self.characters_path.rglob('*.md'))
        for md_file in md_files:
            self.load_character_file(md_file)

    def load_character_file(self, file_path: Path):
        """Load and parse a character file"""
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Extract name (first H1)
        title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
        name = title_match.group(1) if title_match else file_path.stem

        # Skip relationship graph file
        if '关系图谱' in name:
            return

        # Generate ID
        char_id = self.generate_id(name)

        # Extract properties
        properties = {
            'name': name,
            'file': str(file_path.relative_to(self.characters_path.parent))
        }

        # Extract age
        age_match = re.search(r'\*\*年龄\*\*:\s*(\d+)', content)
        if age_match:
            properties['age'] = age_match.group(1)

        # Extract gender
        gender_match = re.search(r'\*\*性别\*\*:\s*([男女])', content)
        if gender_match:
            properties['gender'] = gender_match.group(1)

        # Extract occupation
        occupation_match = re.search(r'\*\*职业\*\*:\s*([^\n]+)', content)
        if occupation_match:
            properties['occupation'] = occupation_match.group(1).strip()

        # Extract status
        status_match = re.search(r'\*\*状态\*\*:\s*([^\n]+)', content)
        if status_match:
            properties['status'] = status_match.group(1).strip()

        # Extract relationships from [[link]] format
        link_matches = re.findall(r'\[\[([^\]]+)\]\]', content)
        for link in link_matches:
            link_id = self.generate_id(link)
            self.relationships.append({
                'from': char_id,
                'to': link_id,
                'type': 'mentioned'
            })

        # Extract specific relationships from content
        self.extract_relationships_from_content(content, char_id)

        # Add character
        self.characters[char_id] = {
            'id': char_id,
            'type': 'Character',
            'properties': properties
        }

    def extract_relationships_from_content(self, content: str, char_id: str):
        """Extract relationships from content sections"""
        # Family relationships
        if '## 家庭关系' in content:
            family_section = re.search(r'## 家庭关系\s*\n(.*?)(?=##|$)', content, re.DOTALL)
            if family_section:
                # Extract mother
                mother_match = re.search(r'- \*\*母亲\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
                if mother_match:
                    mother_id = self.generate_id(mother_match.group(1))
                    self.relationships.append({
                        'from': char_id,
                        'to': mother_id,
                        'type': 'mother_of'
                    })

                # Extract father
                father_match = re.search(r'- \*\*父亲\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
                if father_match:
                    father_id = self.generate_id(father_match.group(1))
                    self.relationships.append({
                        'from': char_id,
                        'to': father_id,
                        'type': 'father_of'
                    })

                # Extract son
                son_match = re.search(r'- \*\*儿子\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
                if son_match:
                    son_id = self.generate_id(son_match.group(1))
                    self.relationships.append({
                        'from': char_id,
                        'to': son_id,
                        'type': 'son_of'
                    })

                # Extract wife
                wife_match = re.search(r'- \*\*妻子\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
                if wife_match:
                    wife_id = self.generate_id(wife_match.group(1))
                    self.relationships.append({
                        'from': char_id,
                        'to': wife_id,
                        'type': 'wife_of'
                    })

                # Extract husband
                husband_match = re.search(r'- \*\*丈夫\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
                if husband_match:
                    husband_id = self.generate_id(husband_match.group(1))
                    self.relationships.append({
                        'from': char_id,
                        'to': husband_id,
                        'type': 'husband_of'
                    })

    def generate_id(self, name: str) -> str:
        """Generate consistent ID from name"""
        normalized = re.sub(r'[^a-z0-9\u4e00-\u9fff]+', '_', name.lower()).strip('_')
        return f"char_{normalized}"

    def visualize_text(self):
        """Generate ASCII text visualization"""
        print("=" * 60)
        print("《杀了婆婆的我却无人追责？》人物关系图谱")
        print("=" * 60)

        # Print characters
        print("\n【人物列表】")
        for char_id, char in self.characters.items():
            props = char['properties']
            print(f"\n  {props.get('name', 'Unknown')}")
            if 'age' in props:
                print(f"    年龄: {props['age']}")
            if 'gender' in props:
                print(f"    性别: {props['gender']}")
            if 'occupation' in props:
                print(f"    职业: {props['occupation']}")
            if 'status' in props:
                print(f"    状态: {props['status']}")

        # Print relationships
        print("\n【人物关系】")
        for rel in self.relationships:
            from_char = self.characters.get(rel['from'])
            to_char = self.characters.get(rel['to'])
            if from_char and to_char:
                from_name = from_char['properties']['name']
                to_name = to_char['properties']['name']
                rel_type = rel['type']
                print(f"\n  {from_name} --[{rel_type}]--> {to_name}")

        # Print ASCII graph
        print("\n【ASCII 关系图】")
        self.print_ascii_graph()

    def print_ascii_graph(self):
        """Print simple ASCII relationship graph"""
        print("""
            ┌─────────────┐
            │  顾国强     │
            │  (已故)     │
            └──────┬──────┘
                   │
                   │ 夫妻
                   │
            ┌──────▼──────┐
            │  刘婉清     │
            │  (68岁)     │
            │  婆婆      │
            └──────┬──────┘
                   │
       ┌───────────┼───────────┐
       │           │           │
    母子       控制压迫      婆媳
       │           │           │
  ┌────▼────┐      │      ┌────▼────┐
  │  顾长风  │      │      │  叶知秋  │
  │  (35岁)  │      │      │  (28岁)  │
  │  丈夫    │      │      │  女主    │
  └────┬────┘      │      └────┬────┘
       │           │           │
    夫妻        想要        记忆
       │         遗产         混乱
       │           │           │
       └───────────┼───────────┘
                   │
                调查案件
                   │
           ┌──────▼──────┐
           │  张明远     │
           │  (所长)     │
           └─────────────┘
        """)

    def export_json(self, output_path: str):
        """Export to JSON for further processing"""
        output = {
            'characters': list(self.characters.values()),
            'relationships': self.relationships
        }

        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(output, f, ensure_ascii=False, indent=2)

        print(f"\n✅ 导出到: {output_path}")


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Novel Character Relationship Graph (Corrected Names)')
    parser.add_argument('--path', type=str,
                       default='/root/.openclaw/workspace/novel/characters',
                       help='Path to character notes')
    parser.add_argument('--export', type=str,
                       help='Export to JSON file')

    args = parser.parse_args()

    # Create graph
    graph = NovelCharacterGraph(args.path)

    # Load characters
    graph.load_characters()

    # Visualize
    graph.visualize_text()

    # Export if requested
    if args.export:
        graph.export_json(args.export)