|
@@ -0,0 +1,71 @@
|
|
|
+import numpy as np
|
|
|
+import pandas as pd
|
|
|
+import dataset
|
|
|
+import datetime
|
|
|
+import time
|
|
|
+# import the TrendReq method from the pytrends request module
|
|
|
+from pytrends.request import TrendReq
|
|
|
+
|
|
|
+
|
|
|
+def transform_time(array):
|
|
|
+ ans = []
|
|
|
+ for i in range(len(array)):
|
|
|
+ ans.append(datetime.datetime.strptime(str(array[i])[:10], "%Y-%m-%d"))
|
|
|
+ return np.array(ans)
|
|
|
+
|
|
|
+def df_to_db(df):
|
|
|
+ columns = ['iot_kword', 'iot_date', 'iot_value', 'iot_dtime']
|
|
|
+ length = len(df)
|
|
|
+ iot_kword = df.columns[0]
|
|
|
+
|
|
|
+ to_db_df = pd.DataFrame(columns=columns)
|
|
|
+ iot_kwords = len(df) * [iot_kword]
|
|
|
+ iot_dates = transform_time(df.index.values)
|
|
|
+ iot_values = df[iot_kword].values
|
|
|
+ iot_dtime = datetime.datetime.utcnow() + datetime.timedelta(hours=8)
|
|
|
+
|
|
|
+ to_db_df['iot_kword'] = iot_kwords
|
|
|
+ to_db_df['iot_date'] = iot_dates
|
|
|
+ to_db_df['iot_value'] = iot_values
|
|
|
+ to_db_df['iot_dtime'] = iot_dtime
|
|
|
+
|
|
|
+ return to_db_df
|
|
|
+
|
|
|
+def get_table(table_name, db_name):
|
|
|
+ db = dataset.connect(f'mysql://choozmo:pAssw0rd@db.ptt.cx:3306/{db_name}?charset=utf8mb4')
|
|
|
+ table = db[table_name]
|
|
|
+
|
|
|
+ return table
|
|
|
+
|
|
|
+def data_to_db(table, data):
|
|
|
+ rows = []
|
|
|
+ columns = ['iot_kword', 'iot_date', 'iot_value', 'iot_dtime']
|
|
|
+
|
|
|
+ for i in range(len(data)):
|
|
|
+ rows.append({})
|
|
|
+ for j, column in enumerate(data.columns):
|
|
|
+ rows[i][column] = data.iloc[i, j]
|
|
|
+ print('db updating...')
|
|
|
+ table.insert_many(rows)
|
|
|
+ print('db updated.')
|
|
|
+
|
|
|
+def crawler_iot_topic_tree(keywords, timeframe='today 3-m'):
|
|
|
+ """
|
|
|
+ 輸入keywords,
|
|
|
+ """
|
|
|
+ pytrend = TrendReq()
|
|
|
+
|
|
|
+ for keyword in keywords:
|
|
|
+ print(f'{keyword} 資料抓取中...')
|
|
|
+ pytrend.build_payload(
|
|
|
+ kw_list=[keyword],
|
|
|
+ cat=0,
|
|
|
+ timeframe=timeframe,
|
|
|
+ geo='TW',
|
|
|
+ gprop=''
|
|
|
+ )
|
|
|
+ to_topics_interest_over_time = pytrend.interest_over_time()
|
|
|
+ data = df_to_db(to_topics_interest_over_time)
|
|
|
+ table = get_table('topic_tree_g_trend_iot', 'cmm_test')
|
|
|
+ data_to_db(table, data)
|
|
|
+ time.sleep(5)
|