import numpy as np import pandas as pd # from sklearn.linear_model import LinearRegression # import matplotlib.pyplot as plt import dataset import pymysql pymysql.install_as_MySQLdb() db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-08-01%' and '2024-09-05%'") kw_lst = [] for c in cursor: kw_lst.append(c['kw']) unique_kw = {key:0 for key in set(kw_lst)} cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-08-01%' and '2024-09-05%'") for c in cursor: if c['kw'] in unique_kw: unique_kw[c['kw']] += 1 kw_count = dict(sorted(unique_kw.items(), key=lambda item: item[1], reverse=True)) print(kw_count) rank_first = {key:0 for key in kw_count} rank_last = {key:0 for key in kw_count} for i in rank_last.keys(): cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt desc limit 1") for c in cursor: rank_last[c['kw']] = c['ranking'] for i in rank_first.keys(): cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt limit 1") for c in cursor: rank_first[c['kw']] = c['ranking'] print('result,',rank_first) print('result,',rank_last) db.close() diff = {key:rank_first[key]-rank_last[key] for key in rank_first} rank_diff = list(diff.values()) print(diff) # X表示點擊數量 Y代表排名 X = np.array(unique_kw).reshape(-1, 1) Y = np.array(diff) # 创建线性回归模型 model = LinearRegression() # 拟合模型 model.fit(X, Y) # 获取回归系数 slope = model.coef_[0] print(f'迴歸係數: {slope}') if slope > 0: print("正向關係") elif slope < 0: print("負向關係") else: print("沒有線性關係") # 圖表 # plt.scatter(X, Y, color='blue') # 繪製數據點 # plt.plot(X, model.predict(X), color='red') # 繪製回關線 # plt.xlabel('X') # plt.ylabel('Y') # plt.title('線性回歸') # plt.show()