import os import re import json import urllib import requests import asyncio import traceback from pyppeteer import launch from flask import request, abort, jsonify from flask import current_app as app from utils import get_all_titles, get_all root = 'https://scrapbox.io' app.config['JSON_AS_ASCII'] = False # responseに日本語を含めるため def get_public_pages(src_project, public_tag, private_tag, sid): '''同期元プロジェクトから同期対象のページを取得''' tag_page = f'{root}/api/pages/{src_project}/{public_tag}' tagged_pages_metadata = requests.get(tag_page, headers={'Cookie': f'connect.sid={sid}'}).json()['relatedPages']['links1hop'] tagged_pages_urls = [f'{root}/api/pages/{src_project}/{urllib.parse.quote(page["title"], safe="")}' for page in tagged_pages_metadata] tagged_pages = asyncio.run(get_all(tagged_pages_urls, cookies={'connect.sid': sid})) public_pages = [] while tagged_pages: page = tagged_pages.pop() texts = [line['text'] for line in page['lines']] # cancel if private tag exists if f'#{private_tag}' in texts: continue # remove public-tag line tag_idx = texts.index(f'#{public_tag}') # NOTE: only the first one found # remove line next to tag line if it's empty if tag_idx < len(texts) - 1 and not texts[tag_idx + 1]: del page['lines'][tag_idx + 1] del page['lines'][tag_idx] # remove unneccesary attributions public_pages.append({key: page[key] for key in ['title', 'created', 'updated', 'id', 'lines']}) print(f'Found {len(public_pages)} pages to import.') return public_pages def import_pages(pages, dst_project, sid): '''同期先プロジェクトへ対象ページをimportする''' import_json = json.dumps({'pages': pages}) token = requests.get( f'{root}/api/users/me', headers={'Cookie': f'connect.sid={sid}'}, ).json()['csrfToken'] response = requests.post( f'{root}/api/page-data/import/{dst_project}.json', headers={'Cookie': f'connect.sid={sid}', 'X-CSRF-TOKEN': token}, files={'import-file': ('import.json', import_json, 'application/json')}, ) print(f'from {root}: {response.json()["message"]}') return response.status_code def get_deleted_titles(src_pages, dst_project, whitelist): '''同期元プロジェクトで削除されたページを取得''' src_titles = [page['title'] for page in src_pages] dst_titles = get_all_titles(dst_project) to_delete_titles = list(filter( lambda title: (title not in src_titles) and (title not in whitelist), dst_titles )) print(f'Found {len(to_delete_titles)} pages to delete.') return to_delete_titles def delete_pages(titles, dst_project, sid): '''同期先プロジェクトで対象ページを削除''' enc_urls = [f'{root}/{dst_project}/{urllib.parse.quote(title, safe="")}' for title in titles] cookie = { 'name': 'connect.sid', 'value': sid, 'domain': 'scrapbox.io', } async def delete_page(enc_url, browser): page = await browser.newPage() await page.setCookie(cookie) await page.goto(enc_url) await page.waitForSelector('a[role="menuitem"][title="Delete"]') await page.evaluate('() => window.confirm = (nope) => true') await page.evaluate('''() => document.querySelector('a[role="menuitem"][title="Delete"]').click()''') await page.waitForSelector('.quick-launch.layout-list') return True async def run(): browser = await launch( handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False, ) results = await asyncio.gather(*[delete_page(url, browser) for url in enc_urls], return_exceptions=True) await browser.close() return results results = asyncio.run(run()) succeeded, failed = [], [] for title, res in zip(titles, results): if res == True: succeeded.append(f'{dst_project}/{title}') else: print(f'{dst_project}/{title} could not deleted due to {res}') failed.append(f'{dst_project}/{title}') result = dict(succeeded=succeeded, failed=failed) print(f'delete_pages: {result}') return result def main(request): if request.method != 'POST': abort(404, 'not found') if request.headers['X-Internal-Key'] != os.environ.get('X_INTERNAL_KEY'): abort(403, 'forbidden') content_type = request.headers['content-type'] if content_type != 'application/json': abort(400, 'bad request') request_json = request.get_json(silent=True) keys = ['src_project', 'dst_project', 'public_tag', 'private_tag', 'whitelist', 'sid'] if not request_json or any(key not in request_json for key in keys): abort(400, 'bad request') src_project = request_json['src_project'] dst_project = request_json['dst_project'] public_tag = request_json['public_tag'] private_tag = request_json['private_tag'] whitelist = request_json['whitelist'] sid = request_json['sid'] to_import_pages = get_public_pages(src_project, public_tag, private_tag, sid) import_response_code = import_pages(to_import_pages, dst_project, sid) if import_response_code != 200: abort(500, 'import error') to_delete_titles = get_deleted_titles(to_import_pages, dst_project, whitelist) delete_result = delete_pages(to_delete_titles, dst_project, sid) return jsonify(number_of_imported_pages=f'{len(to_import_pages)}', delete=delete_result), 200 # debug import yaml from flask import Flask if __name__ == '__main__': with open('./envs.yaml') as f: os.environ.update(yaml.load(f, Loader=yaml.FullLoader)) app = Flask(__name__) @app.route('/', methods=['GET', 'POST']) def index(): return main(request) app.run('127.0.0.1', 8000, debug=True)