Hugoサイトで使用中のタグを抽出する
code:list_tags.py
import os
import re
from collections import Counter
tags_counter = Counter()
for root, dirs, files in os.walk("content"):
for file in files:
if not file.endswith(".md"):
continue
filepath = os.path.join(root, file)
with open(filepath, encoding="utf-8") as f:
text = f.read()
# フロントマターを取り出す
match = re.match(r"^---\n(.*?)\n---", text, re.DOTALL)
if not match:
continue
frontmatter = match.group(1)
# インライン記法: tags: a, b, c
inline = re.search(r"^tags:\s*\(.+?)\", frontmatter, re.MULTILINE)
if inline:
for tag in inline.group(1).split(","):
tags_countertag.strip().strip('"').strip("'") += 1
# リスト記法: tags:\n - a
list_tags = re.findall(r"^tags:\n((?:\s+-\s+.+\n?)+)", frontmatter, re.MULTILINE)
for block in list_tags:
for tag in re.findall(r"-\s+(.+)", block):
tags_countertag.strip().strip('"').strip("'") += 1
# 出力(使用回数の多い順)
for tag, count in tags_counter.most_common():
print(f"{count:>4} {tag}")
https://gyazo.com/26c51d171f9f00f00f68f77aeb1bcde4
できた〜
#Hugo
#Python