concat.py中身1

code:py

# encoding: utf-8

import datetime

import glob

import os

import sys

import re

class Util:

@staticmethod

def success():

sys.exit(0)

@staticmethod

def abort(msg):

print(msg)

sys.exit(1)

@staticmethod

def file2list(filepath):

ret = []

with open(filepath, encoding='utf8', mode='r') as f:

ret = line.rstrip('\n') for line in f.readlines()

return ret

@staticmethod

def list2file(filepath, ls):

with open(filepath, encoding='utf8', mode='w') as f:

f.writelines('{:}\n'.format(line) for line in ls )

@staticmethod

def lines2str(lines):

return LINEBREAK.join(lines)

@staticmethod

def str2lines(s):

return s.split(LINEBREAK)

@staticmethod

def get_filename(path):

return os.path.basename(path)

@staticmethod

def get_basename(path):

return os.path.splitext(Util.get_filename(path))0

@staticmethod

def get_extension(path):

return os.path.splitext(Util.get_filename(path))1

@staticmethod

def get_lastmodified_by_sec(filename):

stat_result = os.stat(filename)

return stat_result.st_mtime

@staticmethod

def remove_invalid_char_based_on_hidemaruscb(filename):

# from https://github.com/stakiran/hidemaru_scb/blob/master/scb_new_or_open.mac#L157

after = filename

afterchar = '_'

after = after.replace('\\', afterchar)

after = after.replace('/', afterchar)

after = after.replace(':', afterchar)

after = after.replace('*', afterchar)

after = after.replace('?', afterchar)

after = after.replace('\"', afterchar)

after = after.replace('>', afterchar)

after = after.replace('<', afterchar)

after = after.replace('|', afterchar)

after = after.replace(' ', afterchar)

return after

RE_LINK_ANOTHER_PAGE = re.compile(r'\[(^\-\*/)(.+?)\](^\(|$)')

def get_linked_filename(line):

# '動揺日記'

# |

# V

# ('動', '揺日記', '')

# キャプチャの都合上、こんな風に \1 \2 \3 でマッチする

NO_FILENAME = ''

matched_all = re.findall(RE_LINK_ANOTHER_PAGE, line)

is_empty = len(matched_all)==0

if is_empty:

return NO_FILENAME

# findall なので ... が n 個あっても検出できるが

# まだテストしてない＆想定もしてないので、1 個だけ書いてあるとみなす

matched = matched_all0

filename = f'{matched0}{matched1}.scb'

filename = Util.remove_invalid_char_based_on_hidemaruscb(filename)

return filename

def parse_arguments():

import argparse

parser = argparse.ArgumentParser()

parser.add_argument('-i', '--input-filename', default='おわた.scb')

parser.add_argument('-o', '--output-filename', default='おわた_concated.scb')

parsed_args = parser.parse_args()

return parsed_args

if __name__ == '__main__':

LINEBREAK = '\n'

MYFULLPATH = os.path.abspath(sys.argv0)

MYDIR = os.path.dirname(MYFULLPATH)

args = parse_arguments()

input_fullpath = os.path.join(MYDIR, args.input_filename)

output_fullpath = os.path.join(MYDIR, args.output_filename)

lines = Util.file2list(input_fullpath)

outlines = []

linkcount = 0

for line in lines:

trimmed_line = line.strip(' ')

maybe_filename = get_linked_filename(trimmed_line)

if maybe_filename=='':

continue

INDENT = ' '

filename = maybe_filename

fullpath = os.path.join(MYDIR, filename)

doesnotExists = not os.path.exists(fullpath)

if doesnotExists:

contents_with_indented = '{}<Nothing>'.format(INDENT)

else:

contents = Util.file2list(fullpath)

contents_with_indented = '{}{}'.format(INDENT, line) for line in contents

linkcount += 1

# xxx

# contents

# content

# yyy

# contents

# ...

# こうしたい

content_volume = len(contents_with_indented)

caption = trimmed_line

caption_display = '{} {}'.format(content_volume, caption)

outlines.append(caption_display)

outlines.extend(contents_with_indented)

outlines.append('')

outlines.insert(0, '')

outlines.insert(0, 'all {} files.'.format(linkcount))

Util.list2file(output_fullpath, outlines)