Hugoサイトで使用中のタグを抽出する

code:list_tags.py

import os

import re

from collections import Counter

tags_counter = Counter()

for root, dirs, files in os.walk("content"):

for file in files:

if not file.endswith(".md"):

continue

filepath = os.path.join(root, file)

with open(filepath, encoding="utf-8") as f:

text = f.read()

# フロントマターを取り出す

match = re.match(r"^---\n(.*?)\n---", text, re.DOTALL)

if not match:

continue

frontmatter = match.group(1)

# インライン記法: tags: a, b, c

inline = re.search(r"^tags:\s*\(.+?)\", frontmatter, re.MULTILINE)

if inline:

for tag in inline.group(1).split(","):

# リスト記法: tags:\n - a

list_tags = re.findall(r"^tags:\n((?:\s+-\s+.+\n?)+)", frontmatter, re.MULTILINE)

for block in list_tags:

for tag in re.findall(r"-\s+(.+)", block):

# 出力（使用回数の多い順）

for tag, count in tags_counter.most_common():

print(f"{count:>4} {tag}")

https://gyazo.com/26c51d171f9f00f00f68f77aeb1bcde4

できた〜