import numpy as np
import pandas as pd
# from sklearn.linear_model import LinearRegression
# import matplotlib.pyplot as plt
import dataset
import pymysql
pymysql.install_as_MySQLdb()

db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-08-01%' and '2024-09-05%'")

kw_lst = []
for c in cursor:
    kw_lst.append(c['kw'])

unique_kw = {key:0 for key in set(kw_lst)}
cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-08-01%' and '2024-09-05%'")

for c in cursor:
    if c['kw'] in unique_kw:
        unique_kw[c['kw']] += 1

kw_count = dict(sorted(unique_kw.items(), key=lambda item: item[1], reverse=True))
print(kw_count)

rank_first = {key:0 for key in kw_count}
rank_last = {key:0 for key in kw_count}
for i in rank_last.keys():
    cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt desc limit 1")
    for c in cursor:
        rank_last[c['kw']] = c['ranking']

for i in rank_first.keys():
    cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt limit 1")
    for c in cursor:
        rank_first[c['kw']] = c['ranking']


print('result,',rank_first)
print('result,',rank_last)
db.close()
diff = {key:rank_first[key]-rank_last[key] for key in rank_first}
rank_diff = list(diff.values())
print(diff)
# X表示點擊數量 Y代表排名
X = np.array(unique_kw).reshape(-1, 1)
Y = np.array(diff)

# 创建线性回归模型
model = LinearRegression()

# 拟合模型
model.fit(X, Y)

# 获取回归系数
slope = model.coef_[0]

print(f'迴歸係數: {slope}')

if slope > 0:
    print("正向關係")
elif slope < 0:
    print("負向關係")
else:
    print("沒有線性關係")

# 圖表
# plt.scatter(X, Y, color='blue')  # 繪製數據點
# plt.plot(X, model.predict(X), color='red')  # 繪製回關線
# plt.xlabel('X')
# plt.ylabel('Y')
# plt.title('線性回歸')
# plt.show()