Twitter APIを用いて特定アカウントの過去ツイートをまとめて取得する方法

#public

Twitter API の準備

bearer_tokenをメモ

https://qiita.com/neru-dev/items/857cc27fd69411496388

Twitter APIを使ってツイートを取得

code:Twitter.py

import tweepy

from datetime import datetime

import pandas as pd

# 認証情報を設定

bearer_token = 'XXXXXXX'

# Tweepyクライアントを初期化

client = tweepy.Client(bearer_token=bearer_token)

# ユーザー名からユーザーIDを取得

account = input('Account:@')

user = client.get_user(username=account)

user_id = user.data.id

# ユーザーから取得したいツイートの総数を尋ねる

total_tweets = int(input("取得したいツイートの数を入力してください: "))

# 現在の日時を取得し、ファイル名に使用する形式にフォーマット

current_time = datetime.now().strftime("%Y%m%d_%H%M%S")

info_list = []

tweet_fields = [

'created_at', 'text', 'id','author_id', 'conversation_id',

'in_reply_to_user_id', 'referenced_tweets', 'attachments',

'entities', 'public_metrics', 'possibly_sensitive',

'lang', 'source', 'context_annotations'

]

csv_fields = [

'created_at', 'text','url', 'id','author_id', 'conversation_id',

'in_reply_to_user_id', 'referenced_tweets', 'attachments',

'entities', 'public_metrics', 'possibly_sensitive',

'lang', 'source', 'context_annotations'

]

# ページネーション用のトークンとカウンター

next_token = None

collected_tweets = 0

try:

while collected_tweets < total_tweets:

# 残りの必要ツイート数を計算

remaining_tweets = total_tweets - collected_tweets

batch_size = min(100, remaining_tweets) # 一度に取得するツイート数（最大100）

if batch_size <= 4:

batch_size = 5

# ツイートを取得

tweets = client.get_users_tweets(id=user_id, max_results=batch_size, tweet_fields=tweet_fields, pagination_token=next_token)

# ツイート情報をリストに追加

if tweets.data:

for tweet in tweets.data:

info_dic = {

'created_at': tweet.created_at.strftime("%Y-%m-%d %H:%M:%S") if tweet.created_at else 'N/A',

'text': tweet.text,

'url': f"https://twitter.com/{account}/status/{tweet.id}",

'id': tweet.id,

'author_id': tweet.author_id,

'conversation_id': tweet.conversation_id,

'in_reply_to_user_id': tweet.in_reply_to_user_id,

'referenced_tweets': tweet.referenced_tweets,

'attachments': tweet.attachments,

'entities': tweet.entities,

'public_metrics': tweet.public_metrics,

'possibly_sensitive': tweet.possibly_sensitive,

'lang': tweet.lang,

'source': tweet.source,

'context_annotations': tweet.context_annotations

}

info_list.append(info_dic)

collected_tweets += 1

if collected_tweets >= total_tweets:

break

# 次のページのトークンを更新

next_token = tweets.meta.get('next_token')

# print(next_token)

# 次のページがなければループを終了

if not next_token:

break

except tweepy.errors.Forbidden as e:

print(f"アクセス権限エラー: {e}")

except tweepy.errors.HTTPException as e:

print(f"HTTPエラー: {e}")

except Exception as e:

print(f"予期せぬエラー: {e}")

df = pd.DataFrame(info_list)# info_listをDataFrameに変換

df = df::-1#逆順にする

df.to_csv(f"{current_time}_twitter_{account}.csv", index=False, encoding="utf_8_sig")# DataFrameをCSVファイルに保存

# conversation_id に基づいてグループ化し、指定した集約方法で集約

# 各カラムの集約方法を設定

aggregation_methods = {col: 'first' for col in df.columns if col != 'text'}

aggregation_methods'text' = '\n-------------------\n'.join # 'text' カラムだけは結合する

grouped_df = df.groupby('conversation_id', as_index=False).agg(aggregation_methods)

grouped_df = grouped_dflist(df.columns) #列順を元に戻す

grouped_df.to_csv(f"{current_time}_twitter_grouped_{account}.csv", index=False, encoding='utf-8-sig')

# テキストを結合してファイルに保存

total_text = ""

max_chars = 50000 # 最大文字数

file_index = 0

current_text = ""

for time_text, t, url in zip(list(grouped_df"created_at"), list(grouped_df"text"), list(grouped_df"url")):

texts = + s for s in t.split("\n")

text = "\n".join(texts)

section = f"{time_text}\n{text}\n{url}\n\n"

if len(current_text) + len(section) > max_chars:

# ファイルにcurrent_textを保存

with open(f"{current_time}_{file_index}_twitter_{account}.txt", "w") as f:

f.write(current_text)

file_index += 1

current_text = section # 新しいセクションを開始

else:

current_text += section

# 残っているテキストがあればそれも保存

if current_text:

current_time = datetime.now().strftime("%Y%m%d_%H%M%S")

with open(f"{current_time}_{file_index}_twitter_{account}.txt", "w") as f:

f.write(current_text)

code:ファイルを実行

python Twitter.py

まず取得したいTwitterアカウントのユーザ名を取得

https://gyazo.com/76c3dc1436cfa191ff014b25cab81dfe

取得したいツイート数を入力

https://gyazo.com/3900002bd67b9bf4a575ad2ab8adaafe

.txtファイルをScrapboxにコピペ