import numpy as np import pandas as pd import dataset import datetime import time # import the TrendReq method from the pytrends request module from pytrends.request import TrendReq def transform_time(array): ans = [] for i in range(len(array)): ans.append(datetime.datetime.strptime(str(array[i])[:10], "%Y-%m-%d")) return np.array(ans) def df_to_db(df): columns = ['iot_kword', 'iot_date', 'iot_value', 'iot_dtime'] length = len(df) iot_kword = df.columns[0] to_db_df = pd.DataFrame(columns=columns) iot_kwords = len(df) * [iot_kword] iot_dates = transform_time(df.index.values) iot_values = df[iot_kword].values iot_dtime = datetime.datetime.utcnow() + datetime.timedelta(hours=8) to_db_df['iot_kword'] = iot_kwords to_db_df['iot_date'] = iot_dates to_db_df['iot_value'] = iot_values to_db_df['iot_dtime'] = iot_dtime return to_db_df def get_table(table_name, db_name): db = dataset.connect(f'mysql://choozmo:pAssw0rd@db.ptt.cx:3306/{db_name}?charset=utf8mb4') table = db[table_name] return table def data_to_db(table, data): rows = [] columns = ['iot_kword', 'iot_date', 'iot_value', 'iot_dtime'] for i in range(len(data)): rows.append({}) for j, column in enumerate(data.columns): rows[i][column] = data.iloc[i, j] print('db updating...') table.insert_many(rows) print('db updated.') def crawler_iot_topic_tree(keywords, timeframe='today 3-m'): """ 輸入keywords, """ pytrend = TrendReq() for keyword in keywords: print(f'{keyword} 資料抓取中...') pytrend.build_payload( kw_list=[keyword], cat=0, timeframe=timeframe, geo='TW', gprop='' ) to_topics_interest_over_time = pytrend.interest_over_time() data = df_to_db(to_topics_interest_over_time) table = get_table('topic_tree_g_trend_iot', 'cmm_test') data_to_db(table, data) time.sleep(5)