Browse Source

關聯分析

zooey 1 year ago
parent
commit
aef9647e09
1 changed files with 73 additions and 0 deletions
  1. 73 0
      SEO/correlation_analysis.py

+ 73 - 0
SEO/correlation_analysis.py

@@ -0,0 +1,73 @@
+import numpy as np
+import pandas as pd
+# from sklearn.linear_model import LinearRegression
+# import matplotlib.pyplot as plt
+import dataset
+import pymysql
+pymysql.install_as_MySQLdb()
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-08-01%' and '2024-09-05%'")
+
+kw_lst = []
+for c in cursor:
+    kw_lst.append(c['kw'])
+
+unique_kw = {key:0 for key in set(kw_lst)}
+cursor = db.query("select * from seo.google_bank where url like '%hhh.com.tw%' and dt between '2024-08-01%' and '2024-09-05%'")
+
+for c in cursor:
+    if c['kw'] in unique_kw:
+        unique_kw[c['kw']] += 1
+
+kw_count = dict(sorted(unique_kw.items(), key=lambda item: item[1], reverse=True))
+print(kw_count)
+
+rank_first = {key:0 for key in kw_count}
+rank_last = {key:0 for key in kw_count}
+for i in rank_last.keys():
+    cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt desc limit 1")
+    for c in cursor:
+        rank_last[c['kw']] = c['ranking']
+
+for i in rank_first.keys():
+    cursor = db.query(f"select * from seo.google_bank where kw='{i}' order by dt limit 1")
+    for c in cursor:
+        rank_first[c['kw']] = c['ranking']
+
+
+print('result,',rank_first)
+print('result,',rank_last)
+db.close()
+diff = {key:rank_first[key]-rank_last[key] for key in rank_first}
+rank_diff = list(diff.values())
+print(diff)
+# X表示點擊數量 Y代表排名
+X = np.array(unique_kw).reshape(-1, 1)
+Y = np.array(diff)
+
+# 创建线性回归模型
+model = LinearRegression()
+
+# 拟合模型
+model.fit(X, Y)
+
+# 获取回归系数
+slope = model.coef_[0]
+
+print(f'迴歸係數: {slope}')
+
+if slope > 0:
+    print("正向關係")
+elif slope < 0:
+    print("負向關係")
+else:
+    print("沒有線性關係")
+
+# 圖表
+# plt.scatter(X, Y, color='blue')  # 繪製數據點
+# plt.plot(X, model.predict(X), color='red')  # 繪製回關線
+# plt.xlabel('X')
+# plt.ylabel('Y')
+# plt.title('線性回歸')
+# plt.show()