MyBlog/.codex/skills/myblog-blog-writer/scripts/plan_article.py

#!/usr/bin/env python3
"""Plan a MyBlog article path and frontmatter from a topic."""

from __future__ import annotations

import argparse
import datetime as dt
import json
import re
from dataclasses import dataclass
from pathlib import Path


REPO_ROOT = Path(__file__).resolve().parents[4]


@dataclass(frozen=True)
class Rule:
  keywords: tuple[str, ...]
  directory: str
  category: str
  tags: tuple[str, ...]
  icon: str
  requires_sidebar_update: bool = False


@dataclass(frozen=True)
class NewCategoryRule:
  keywords: tuple[str, ...]
  top_directory: str
  category: str
  tags: tuple[str, ...]
  icon: str
  requires_sidebar_update: bool = False
  nested_under_slug: str | None = None


RULES: tuple[Rule, ...] = (
  Rule(
    ('ai', 'llm', 'agent', 'codex', 'claude', 'opencode', 'chatgpt', 'openai', 'gemini', 'iflow', 'openclaw'),
    'src/ai',
    'AI',
    ('AI', '工具'),
    'fa6-solid:robot',
    True,
  ),
  Rule(
    ('docker', '容器', '镜像', 'compose', 'docker-compose'),
    'src/programming/docker',
    'Docker',
    ('Docker', '容器'),
    'mdi:docker',
  ),
  Rule(
    ('java', 'spring', 'spring boot', 'maven', 'jdk', 'jar'),
    'src/programming/backend/java/功能整理',
    'Java',
    ('Java', '后端'),
    'mdi:language-java',
  ),
  Rule(
    ('gin', 'gorm'),
    'src/programming/backend/go/Web开发数据库',
    'Go',
    ('Go', '后端'),
    'mdi:language-go',
  ),
  Rule(
    ('go', 'golang', '并发', 'goroutine', 'channel'),
    'src/programming/backend/go/Go并发模型',
    'Go',
    ('Go', '并发'),
    'mdi:language-go',
  ),
  Rule(
    ('linux mint', 'mint'),
    'src/programming/linux/Linux_Mint',
    'Linux',
    ('Linux Mint', '系统配置'),
    'simple-icons:linuxmint',
  ),
  Rule(
    ('linux', 'ssh', 'nginx', 'vnc', '系统配置', '凝思'),
    'src/programming/linux/基础',
    'Linux',
    ('Linux', '运维'),
    'mdi:linux',
  ),
  Rule(
    ('vue', '前端'),
    'src/programming/frontend/vue',
    '前端',
    ('Vue', '前端'),
    'mdi:vuejs',
  ),
  Rule(
    ('css',),
    'src/programming/frontend/css',
    '前端',
    ('CSS', '前端'),
    'mdi:language-css3',
  ),
  Rule(
    ('html',),
    'src/programming/frontend/html',
    '前端',
    ('HTML', '前端'),
    'mdi:language-html5',
  ),
  Rule(
    ('vscode', 'cursor'),
    'src/programming/frontend/tools',
    '前端工具',
    ('工具', '前端'),
    'mdi:tools',
  ),
  Rule(
    ('自建', 'jellyfin', 'rustdesk', 'nas', '服务'),
    'src/apps',
    '应用',
    ('自建服务', '应用'),
    'mdi:apps',
  ),
  Rule(
    ('windows', 'scoop', 'wsl', 'mobaxterm', 'google', 'gitee', '工具'),
    'src/tools',
    '工具',
    ('工具',),
    'mdi:toolbox',
  ),
  Rule(
    ('工作总结', '项目交付', '权限', '业务记录', '项目总结'),
    'src/work/project-summary',
    '工作',
    ('工作记录', '项目总结'),
    'mdi:book-open-page-variant',
  ),
)


NEW_CATEGORY_RULES: tuple[NewCategoryRule, ...] = (
  NewCategoryRule(
    ('ai', 'llm', 'agent', 'agents', '模型', '大模型', '智能体', 'codex', 'claude', 'opencode', 'chatgpt', 'openai', 'gemini', 'iflow', 'openclaw'),
    'src/ai',
    'AI',
    ('AI', '工具'),
    'fa6-solid:robot',
    True,
  ),
  NewCategoryRule(
    ('自建', '部署', '服务', '相册', '影视', 'nas', 'docker compose', 'compose', 'immich', 'jellyfin', 'rustdesk'),
    'src/apps',
    '应用',
    ('自建服务', '应用'),
    'mdi:apps',
  ),
  NewCategoryRule(
    ('工具', '效率', '客户端', '笔记', '浏览器', '插件', '工作流', 'obsidian', 'scoop', 'wsl', 'mobaxterm', 'google', 'gitee'),
    'src/tools',
    '工具',
    ('工具', '效率'),
    'mdi:toolbox',
  ),
  NewCategoryRule(
    ('工作', '业务', '交付', '记录', '合同', '审批', '流程', '项目总结', '客户', '需求'),
    'src/work/project-summary',
    '工作',
    ('工作记录', '项目总结'),
    'mdi:book-open-page-variant',
  ),
  NewCategoryRule(
    ('python', '数据分析', '框架', '开发', '编程', '后端', '前端', '语言', '环境搭建', '数据库', '算法'),
    'src/programming',
    '编程',
    ('编程',),
    'fa6-solid:code',
  ),
)


def normalize_topic(topic: str) -> str:
  return topic.strip()


def slugify(topic: str) -> str:
  cleaned = re.sub(r'[\\/:*?"<>|#%{}[\]^`]+', '', topic.strip())
  cleaned = re.sub(r'\s+', '-', cleaned)
  cleaned = cleaned.strip('.-_')

  ascii_slug = re.sub(r'[^a-zA-Z0-9\u4e00-\u9fff._-]+', '-', cleaned)
  ascii_slug = re.sub(r'-{2,}', '-', ascii_slug).strip('-')

  if re.search(r'[\u4e00-\u9fff]', ascii_slug):
    return ascii_slug or '未命名文章'

  return ascii_slug.lower() or 'untitled'


def score_rule(topic_lower: str, rule: Rule) -> int:
  return sum(1 for keyword in rule.keywords if keyword.lower() in topic_lower)


def specificity_score(topic_lower: str, rule: Rule) -> int:
  return sum(len(keyword) for keyword in rule.keywords if keyword.lower() in topic_lower)


def score_new_category_rule(topic_lower: str, rule: NewCategoryRule) -> int:
  return sum(1 for keyword in rule.keywords if keyword.lower() in topic_lower)


def new_category_specificity_score(topic_lower: str, rule: NewCategoryRule) -> int:
  return sum(len(keyword) for keyword in rule.keywords if keyword.lower() in topic_lower)


def choose_rules(topic: str) -> tuple[list[tuple[Rule, int]], bool]:
  topic_lower = topic.lower()
  scored = [(rule, score_rule(topic_lower, rule)) for rule in RULES]
  matches = sorted(
    [(rule, score) for rule, score in scored if score > 0],
    key=lambda item: (item[1], specificity_score(topic_lower, item[0])),
    reverse=True,
  )
  if not matches:
    fallback = [
      Rule((), 'src/programming', '编程', ('编程',), 'fa6-solid:code'),
      Rule((), 'src/apps', '应用', ('自建服务', '应用'), 'mdi:apps'),
      Rule((), 'src/tools', '工具', ('工具',), 'mdi:toolbox'),
      Rule((), 'src/work/project-summary', '工作', ('工作记录',), 'mdi:book-open-page-variant'),
      Rule((), 'src/ai', 'AI', ('AI', '工具'), 'fa6-solid:robot', True),
    ]
    return [(rule, 0) for rule in fallback], False

  top_score = matches[0][1]
  top_specificity = specificity_score(topic_lower, matches[0][0])
  top_matches = [
    item
    for item in matches
    if item[1] == top_score and specificity_score(topic_lower, item[0]) == top_specificity
  ]
  return matches[:3], len(top_matches) > 1


def choose_new_category_rule(topic: str) -> tuple[NewCategoryRule, int, bool]:
  topic_lower = topic.lower()
  scored = [(rule, score_new_category_rule(topic_lower, rule)) for rule in NEW_CATEGORY_RULES]
  matches = sorted(
    [(rule, score) for rule, score in scored if score > 0],
    key=lambda item: (item[1], new_category_specificity_score(topic_lower, item[0])),
    reverse=True,
  )
  if not matches:
    return (
      NewCategoryRule(
        (),
        'src/programming',
        '编程',
        ('编程',),
        'fa6-solid:code',
      ),
      0,
      False,
    )

  top_score = matches[0][1]
  top_specificity = new_category_specificity_score(topic_lower, matches[0][0])
  top_matches = [
    item
    for item in matches
    if item[1] == top_score and new_category_specificity_score(topic_lower, item[0]) == top_specificity
  ]
  return matches[0][0], matches[0][1], len(top_matches) > 1


def existing_files(directory: str) -> list[str]:
  path = REPO_ROOT / directory
  if not path.exists():
    return []
  return sorted(child.name for child in path.glob('*.md'))[:12]


def infer_category_slug(topic: str, rule: NewCategoryRule) -> str:
  topic_lower = topic.lower()
  latin_keywords = [
    keyword
    for keyword in rule.keywords
    if re.search(r'[a-zA-Z0-9]', keyword) and keyword.lower() in topic_lower
  ]
  if latin_keywords:
    return slugify(max(latin_keywords, key=len))

  if '合同' in topic and '审批' in topic:
    return '合同审批'

  chinese_keywords = [
    keyword
    for keyword in rule.keywords
    if re.search(r'[\u4e00-\u9fff]', keyword) and keyword in topic
  ]
  if chinese_keywords:
    return slugify(''.join(chinese_keywords[:2]))

  return slugify(topic)


def frontmatter(topic: str, rule: Rule, date: str) -> dict[str, object]:
  return {
    'title': topic,
    'icon': rule.icon,
    'date': date,
    'category': [rule.category],
    'tag': list(rule.tags),
  }


def new_category_readme_frontmatter(topic: str, rule: NewCategoryRule) -> dict[str, object]:
  return {
    'title': topic,
    'index': False,
    'icon': rule.icon,
    'category': [rule.category],
  }


def article_frontmatter_from_new_category(
  topic: str,
  rule: NewCategoryRule,
  date: str,
) -> dict[str, object]:
  return {
    'title': topic,
    'icon': rule.icon,
    'date': date,
    'category': [rule.category],
    'tag': list(rule.tags),
  }


def build_new_category_suggestion(topic: str, date: str) -> tuple[dict[str, object], bool]:
  rule, score, ambiguous = choose_new_category_rule(topic)
  directory_slug = infer_category_slug(topic, rule)
  if rule.nested_under_slug is not None:
    directory = f'{rule.top_directory}/{rule.nested_under_slug}/{directory_slug}'
  else:
    directory = f'{rule.top_directory}/{directory_slug}'

  filename = f'{slugify(topic)}.md'
  readme_path = f'{directory}/README.md'
  article_path = f'{directory}/{filename}'

  return (
    {
      'top_directory': rule.top_directory,
      'directory': directory,
      'directory_slug': directory_slug,
      'readme_path': readme_path,
      'readme_frontmatter': new_category_readme_frontmatter(topic, rule),
      'readme_body': '<Catalog />',
      'article_path': article_path,
      'article_frontmatter': article_frontmatter_from_new_category(topic, rule, date),
      'requires_sidebar_update': rule.requires_sidebar_update,
      'score': score,
    },
    ambiguous,
  )


def build_plan(topic: str, date: str) -> dict[str, object]:
  normalized = normalize_topic(topic)
  candidates, needs_confirmation = choose_rules(normalized)
  new_category_suggestion, new_category_ambiguous = build_new_category_suggestion(normalized, date)
  primary = candidates[0][0]
  filename = f'{slugify(normalized)}.md'
  suggested_directory = str(new_category_suggestion['directory'])
  broad_directory_match = primary.directory in {
    'src/ai',
    'src/apps',
    'src/tools',
    'src/work/project-summary',
  }
  should_create_new_category = (
    candidates[0][1] == 0
    or (broad_directory_match and not (REPO_ROOT / suggested_directory).exists())
  )
  recommended_directory = (
    str(new_category_suggestion['directory'])
    if should_create_new_category
    else primary.directory
  )
  path = (
    str(new_category_suggestion['article_path'])
    if should_create_new_category
    else f'{primary.directory}/{filename}'
  )
  recommended_frontmatter = (
    new_category_suggestion['article_frontmatter']
    if should_create_new_category
    else frontmatter(normalized, primary, date)
  )
  requires_sidebar_update = (
    bool(new_category_suggestion['requires_sidebar_update'])
    if should_create_new_category
    else primary.requires_sidebar_update
  )

  return {
    'topic': normalized,
    'recommended_directory': recommended_directory,
    'filename': filename,
    'path': path,
    'frontmatter': recommended_frontmatter,
    'requires_sidebar_update': requires_sidebar_update,
    'should_create_new_category': should_create_new_category,
    'new_category_suggestion': new_category_suggestion,
    'needs_confirmation': (
      new_category_ambiguous if should_create_new_category else needs_confirmation
    ),
    'candidates': [
      {
        'directory': rule.directory,
        'score': score,
        'category': rule.category,
        'tags': list(rule.tags),
        'requires_sidebar_update': rule.requires_sidebar_update,
        'existing_files_sample': existing_files(rule.directory),
      }
      for rule, score in candidates
    ],
  }


def parse_args() -> argparse.Namespace:
  parser = argparse.ArgumentParser(description='Plan a MyBlog article location.')
  parser.add_argument('topic', help='Article topic text')
  parser.add_argument(
    '--date',
    default=dt.date.today().isoformat(),
    help='Frontmatter date in YYYY-MM-DD format',
  )
  return parser.parse_args()


def main() -> None:
  args = parse_args()
  print(json.dumps(build_plan(args.topic, args.date), ensure_ascii=False, indent=2))


if __name__ == '__main__':
  main()