1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import numpy as np
- import pandas as pd
- import dataset
- import datetime
- import time
- # import the TrendReq method from the pytrends request module
- from pytrends.request import TrendReq
- def transform_time(array):
- ans = []
- for i in range(len(array)):
- ans.append(datetime.datetime.strptime(str(array[i])[:10], "%Y-%m-%d"))
- return np.array(ans)
- def df_to_db(df):
- columns = ['iot_kword', 'iot_date', 'iot_value', 'iot_dtime']
- length = len(df)
- iot_kword = df.columns[0]
-
- to_db_df = pd.DataFrame(columns=columns)
- iot_kwords = len(df) * [iot_kword]
- iot_dates = transform_time(df.index.values)
- iot_values = df[iot_kword].values
- iot_dtime = datetime.datetime.utcnow() + datetime.timedelta(hours=8)
-
- to_db_df['iot_kword'] = iot_kwords
- to_db_df['iot_date'] = iot_dates
- to_db_df['iot_value'] = iot_values
- to_db_df['iot_dtime'] = iot_dtime
-
- return to_db_df
- def get_table(table_name, db_name):
- db = dataset.connect(f'mysql://choozmo:pAssw0rd@db.ptt.cx:3306/{db_name}?charset=utf8mb4')
- table = db[table_name]
-
- return table
- def data_to_db(table, data):
- rows = []
- columns = ['iot_kword', 'iot_date', 'iot_value', 'iot_dtime']
-
- for i in range(len(data)):
- rows.append({})
- for j, column in enumerate(data.columns):
- rows[i][column] = data.iloc[i, j]
- print('db updating...')
- table.insert_many(rows)
- print('db updated.')
- def crawler_iot_topic_tree(keywords, timeframe='today 3-m'):
- """
- 輸入keywords,
- """
- pytrend = TrendReq()
-
- for keyword in keywords:
- print(f'{keyword} 資料抓取中...')
- pytrend.build_payload(
- kw_list=[keyword],
- cat=0,
- timeframe=timeframe,
- geo='TW',
- gprop=''
- )
- to_topics_interest_over_time = pytrend.interest_over_time()
- data = df_to_db(to_topics_interest_over_time)
- table = get_table('topic_tree_g_trend_iot', 'cmm_test')
- data_to_db(table, data)
- time.sleep(5)
|