correlation_analysis.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import numpy as np
  2. import pandas as pd
  3. # from sklearn.linear_model import LinearRegression
  4. # import matplotlib.pyplot as plt
  5. import dataset
  6. import pymysql
  7. pymysql.install_as_MySQLdb()
  8. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  9. cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-01-01%' and '2024-09-05%'")
  10. kw_lst = []
  11. for c in cursor:
  12. kw_lst.append(c['kw'])
  13. unique_kw = {key:0 for key in set(kw_lst)}
  14. cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-01-01%' and '2024-09-05%'")
  15. for c in cursor:
  16. if c['kw'] in unique_kw:
  17. unique_kw[c['kw']] += 1
  18. kw_count = dict(sorted(unique_kw.items(), key=lambda item: item[1], reverse=True))
  19. print(kw_count)
  20. rank_first = {key:0 for key in kw_count}
  21. rank_last = {key:0 for key in kw_count}
  22. for i in rank_last.keys():
  23. cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt desc limit 1")
  24. for c in cursor:
  25. rank_last[c['kw']] = c['ranking']
  26. for i in rank_first.keys():
  27. cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt limit 1")
  28. for c in cursor:
  29. rank_first[c['kw']] = c['ranking']
  30. print('result,',rank_first)
  31. print('result,',rank_last)
  32. db.close()
  33. diff = {key:rank_first[key]-rank_last[key] for key in rank_first}
  34. rank_diff = list(diff.values())
  35. print(diff)
  36. # X表示點擊數量 Y代表排名
  37. # X = np.array(unique_kw).reshape(-1, 1)
  38. # Y = np.array(diff)
  39. # 创建线性回归模型
  40. # model = LinearRegression()
  41. #
  42. # # 拟合模型
  43. # model.fit(X, Y)
  44. #
  45. # # 获取回归系数
  46. # slope = model.coef_[0]
  47. #
  48. # print(f'迴歸係數: {slope}')
  49. #
  50. # if slope > 0:
  51. # print("正向關係")
  52. # elif slope < 0:
  53. # print("負向關係")
  54. # else:
  55. # print("沒有線性關係")
  56. # 圖表
  57. # plt.scatter(X, Y, color='blue') # 繪製數據點
  58. # plt.plot(X, model.predict(X), color='red') # 繪製回關線
  59. # plt.xlabel('X')
  60. # plt.ylabel('Y')
  61. # plt.title('線性回歸')
  62. # plt.show()