{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:48.192614Z",
"start_time": "2021-07-09T05:02:46.463960Z"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import dataset\n",
"import matplotlib.pyplot as plt\n",
"import datetime\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:48.224610Z",
"start_time": "2021-07-09T05:02:48.204616Z"
}
},
"outputs": [],
"source": [
"def get_data(tabel_name='hhh_weekly_keywords'):\n",
" \"\"\"\n",
" 從DB取得資料轉換成df。\n",
" \"\"\"\n",
" db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')\n",
" table = db[tabel_name]\n",
" return pd.DataFrame(table)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:50.909048Z",
"start_time": "2021-07-09T05:02:48.232613Z"
}
},
"outputs": [],
"source": [
"df = get_data()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:50.971707Z",
"start_time": "2021-07-09T05:02:50.911736Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" sessionid | \n",
" query | \n",
" clicks | \n",
" impressions | \n",
" ctr | \n",
" position | \n",
" dt | \n",
" weeknum | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 124571 | \n",
" 1625804194 | \n",
" 幸福空間 | \n",
" 906 | \n",
" 1563 | \n",
" 0.579655 | \n",
" 1.01344 | \n",
" 2021-07-09 12:16:34 | \n",
" 27 | \n",
"
\n",
" \n",
" 1 | \n",
" 124572 | \n",
" 1625804194 | \n",
" 房間風水 | \n",
" 271 | \n",
" 811 | \n",
" 0.334155 | \n",
" 1.15043 | \n",
" 2021-07-09 12:16:34 | \n",
" 27 | \n",
"
\n",
" \n",
" 2 | \n",
" 124573 | \n",
" 1625804194 | \n",
" 室內設計 | \n",
" 201 | \n",
" 5526 | \n",
" 0.036374 | \n",
" 7.16214 | \n",
" 2021-07-09 12:16:34 | \n",
" 27 | \n",
"
\n",
" \n",
" 3 | \n",
" 124574 | \n",
" 1625804194 | \n",
" 門對門 | \n",
" 113 | \n",
" 195 | \n",
" 0.579487 | \n",
" 1.02051 | \n",
" 2021-07-09 12:16:34 | \n",
" 27 | \n",
"
\n",
" \n",
" 4 | \n",
" 124575 | \n",
" 1625804194 | \n",
" 小坪數裝潢 | \n",
" 96 | \n",
" 436 | \n",
" 0.220183 | \n",
" 1.16055 | \n",
" 2021-07-09 12:16:34 | \n",
" 27 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 21094 | \n",
" 145665 | \n",
" 1625804196 | \n",
" 齊舍mobile01 | \n",
" 0 | \n",
" 3 | \n",
" 0.000000 | \n",
" 8.33333 | \n",
" 2021-07-09 12:16:36 | \n",
" 28 | \n",
"
\n",
" \n",
" 21095 | \n",
" 145666 | \n",
" 1625804196 | \n",
" 龍虎 | \n",
" 0 | \n",
" 239 | \n",
" 0.000000 | \n",
" 117.07500 | \n",
" 2021-07-09 12:16:36 | \n",
" 28 | \n",
"
\n",
" \n",
" 21096 | \n",
" 145667 | \n",
" 1625804196 | \n",
" 龍邊是哪一邊 | \n",
" 0 | \n",
" 2 | \n",
" 0.000000 | \n",
" 3.00000 | \n",
" 2021-07-09 12:16:36 | \n",
" 28 | \n",
"
\n",
" \n",
" 21097 | \n",
" 145668 | \n",
" 1625804196 | \n",
" 龍馬名床評價 | \n",
" 0 | \n",
" 4 | \n",
" 0.000000 | \n",
" 8.00000 | \n",
" 2021-07-09 12:16:36 | \n",
" 28 | \n",
"
\n",
" \n",
" 21098 | \n",
" 145669 | \n",
" 1625804196 | \n",
" 龍馬床墊評價 | \n",
" 0 | \n",
" 23 | \n",
" 0.000000 | \n",
" 7.60870 | \n",
" 2021-07-09 12:16:36 | \n",
" 28 | \n",
"
\n",
" \n",
"
\n",
"
21099 rows × 9 columns
\n",
"
"
],
"text/plain": [
" id sessionid query clicks impressions ctr \\\n",
"0 124571 1625804194 幸福空間 906 1563 0.579655 \n",
"1 124572 1625804194 房間風水 271 811 0.334155 \n",
"2 124573 1625804194 室內設計 201 5526 0.036374 \n",
"3 124574 1625804194 門對門 113 195 0.579487 \n",
"4 124575 1625804194 小坪數裝潢 96 436 0.220183 \n",
"... ... ... ... ... ... ... \n",
"21094 145665 1625804196 齊舍mobile01 0 3 0.000000 \n",
"21095 145666 1625804196 龍虎 0 239 0.000000 \n",
"21096 145667 1625804196 龍邊是哪一邊 0 2 0.000000 \n",
"21097 145668 1625804196 龍馬名床評價 0 4 0.000000 \n",
"21098 145669 1625804196 龍馬床墊評價 0 23 0.000000 \n",
"\n",
" position dt weeknum \n",
"0 1.01344 2021-07-09 12:16:34 27 \n",
"1 1.15043 2021-07-09 12:16:34 27 \n",
"2 7.16214 2021-07-09 12:16:34 27 \n",
"3 1.02051 2021-07-09 12:16:34 27 \n",
"4 1.16055 2021-07-09 12:16:34 27 \n",
"... ... ... ... \n",
"21094 8.33333 2021-07-09 12:16:36 28 \n",
"21095 117.07500 2021-07-09 12:16:36 28 \n",
"21096 3.00000 2021-07-09 12:16:36 28 \n",
"21097 8.00000 2021-07-09 12:16:36 28 \n",
"21098 7.60870 2021-07-09 12:16:36 28 \n",
"\n",
"[21099 rows x 9 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:50.986705Z",
"start_time": "2021-07-09T05:02:50.978704Z"
}
},
"outputs": [],
"source": [
"#"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.018703Z",
"start_time": "2021-07-09T05:02:50.990704Z"
}
},
"outputs": [],
"source": [
"# 畫關鍵字成長率長條圖 top 10\n",
"# 1.取得本週與上週的相同出現的query\n",
"# 2.計算兩者之間的成長率\n",
"# 3.取得前10個\n",
"\n",
"\n",
"# 1.\n",
"df_curr_wk = df[df['weeknum'] == '26']\n",
"df_last_wk = df[df['weeknum'] == '25']\n",
"curr_query = set(df_curr_wk['query'].unique())\n",
"last_query = set(df_last_wk['query'].unique())\n",
"query_intersect = curr_query.intersection(last_query)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.034703Z",
"start_time": "2021-07-09T05:02:51.020704Z"
}
},
"outputs": [],
"source": [
"# 用一個資料儲存key: 相差\n",
"data = {}\n",
"\n",
"for key in query_intersect:\n",
" last_click = df_last_wk[df_last_wk['query'] == key]['clicks'].values[0]\n",
" curr_click = df_curr_wk[df_curr_wk['query'] == key]['clicks'].values[0]\n",
"# print(key, curr_click, last_click)\n",
" if last_click == 0:\n",
" continue\n",
" data[key] = [round((curr_click - last_click) / last_click * 100, 2)]\n",
" \n",
" # 還需要的data\n",
" clicks = df_curr_wk[df_curr_wk['query'] == key]['clicks'].values[0]\n",
" ctr = df_curr_wk[df_curr_wk['query'] == key]['ctr'].values[0]\n",
" impressions = df_curr_wk[df_curr_wk['query'] == key]['impressions'].values[0]\n",
"\n",
" data[key] += [clicks, ctr, impressions]\n",
" \n",
" if key == '農曆七月搬家':\n",
" print(key, data[key])\n",
"\n",
" \n",
"data = sorted(data.items(), key=lambda x: x[1], reverse=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.065705Z",
"start_time": "2021-07-09T05:02:51.040706Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.081704Z",
"start_time": "2021-07-09T05:02:51.068704Z"
}
},
"outputs": [],
"source": [
"count = 0\n",
"labels = []\n",
"growth = []\n",
"\n",
"for a in data:\n",
" if a[1][0] != np.inf and (~np.isnan(a[1][0])):\n",
" key, g = a[0], a[1][0]\n",
" labels.append(key)\n",
" growth.append(g)\n",
" count += 1\n",
" if count == 10:\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.097703Z",
"start_time": "2021-07-09T05:02:51.084705Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"labels"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.113707Z",
"start_time": "2021-07-09T05:02:51.099705Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"growth"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.301703Z",
"start_time": "2021-07-09T05:02:51.117707Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(array([-0.2, 0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2]),\n",
" )"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAEBCAYAAABlki5mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAQl0lEQVR4nO3dfZBdd13H8fenbRIabQqlS0MSSlCpMghYyVTpQO2IbRA6QCHCQHAKjESgMpGBgZGqMIIKI384qKMJEMRhRoQZonEoDym0haa2dFOt1PLsQG1LcAEhhZa0Sb7+cc6W27APN9m72f1t3q+ZndmTc7P7/c3dfd9zz713b6oKSVK7TlroASRJc2PIJalxhlySGmfIJalxhlySGmfIJalxpxzvb3jmmWfW+vXrj/e3laSm7d2799tVNTbVvuMe8vXr1zM+Pn68v60kNS3JN6bb56kVSWqcIZekxhlySWqcIZekxg0d8iSnJTl7PoeRJB29WUOe5GFJdgJfBV4wxf7NSfYmuTHJpfMxpCRpesM8/fAg8BbgXODMwR1JVgFbgfOBFcD1Sa6sqgMjnlOSNI1Zj8ir6u6qumWa3RuBXVV1oKr2A3uA80Y5oCRpZnN9sHMdcPvA9p3A6iMvlGRLkvEk4xMTE3P8lpKkQXMN+XLg0MD24f7jQapqe1VtqKoNY2NTvsJUknSM5hryfcCage21wB1z/JqSpKMw15DvBjYlWZbkdLoHRG+a+1iSpGHN+qyVJGcAH6E7970sySXA1cDuqro+yQ7gOrobhSuq6idOrUiS5s+sIa+q7wIXzrB/G7BthDNJko6CL9GXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklqnCGXpMYZcklq3FAhT7I5yd4kNya59Ih9L06yJ8nnkrx+fsaUJE3nlNkukGQVsBU4H1gBXJ/kyqo6kGQZcAVwLnAQuDnJu6vq+/M5tCTpx4Y5It8I7KqqA1W1H9gDnNfvOwwUsAxYDhwC7p2PQSVJU5v1iBxYB9w+sH0nsBqgqg4leQ1wFV3QX19V9x35BZJsAbYAnH322XOdWZI0YJgj8skj7UmH+w+SnAy8FHgT8Hbglf3plgepqu1VtaGqNoyNjc15aEnSjw0T8n3AmoHttcAd/ecXA3dW1dVVtQv4Ot2pGEnScTJMyHcDm5IsS3I63QObN/X77gMeO3DZxwD7RzuiJGkms54jr6q7kuwArqML/xXARUlWVtXOJJckGQfuAa6pqs/M78iSpEHDPNhJVW0Dtk2z77UjnUiSdFR8ZackNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNW6okCfZnGRvkhuTXHrEvlOTvD/JeJIbkpw6P6NKkqZyymwXSLIK2AqcD6wArk9yZVUd6C/yJ8Ceqrps/saUJE1nmCPyjcCuqjpQVfuBPcB5AEmWA79WVdvncUZJ0gyGCfk64PaB7TuB1f3njwb29adWPpvkj6f6Akm29KdexicmJuY2sSTpQYYJ+XLg0MD24f4D4EzgicAfABcCT0jyjCO/QFVtr6oNVbVhbGxsbhNLkh5kmJDvA9YMbK8F7ug/nwBurqq7quoQsAt4wmhHlCTNZJiQ7wY2JVmW5HTgXOCmft/XgLOSnNFvXwD8++jHlCRNZ9ZnrVTVXUl2ANfRhf8K4KIkK6tqZ5I3ALuSHAauraqr5ndkSdKgWUMOUFXbgG3T7NsDPHWUQ0mShucrOyWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckhpnyCWpcYZckho3VMiTbE6yN8mNSS6dYn+SfCrJX49+REnSTE6Z7QJJVgFbgfOBFcD1Sa6sqgMDF3sF8D/zM6IkaSbDHJFvBHZV1YGq2g/sAc6b3JnkkcCzgPfPz4iSpJkME/J1wO0D23cCqwe23wm8EajpvkCSLUnGk4xPTEwc06CSpKkNE/LlwKGB7cP9B0meC3ypqr440xeoqu1VtaGqNoyNjR3zsJKknzTrOXJgH7BmYHstsLv//CXAQ5N8HDgDOCvJ56tq22jHlCRNZ5iQ7wZ2JvlLYCVwLnA5QFVtmrxQkguBTUZcko6vWUNeVXcl2QFcR3cq5grgoiQrq2rnfA8oSZrZMEfk9EfZMx5pV9U1wDVzH0mSdDR8ZackNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNc6QS1LjDLkkNW6okCfZnGRvkhuTXHrEvjcl+Uy/753zM6YkaTqzhjzJKmArcD5wEfDWJCsGLvL5qrqgqn4FOCfJefMzqiRpKsMckW8EdlXVgaraD+wBHoh1Vf3rwGW/AZw+2hElSTMZJuTrgNsHtu8EVh95oSQrgQ3AdVPs25JkPMn4xMTEsc4qSZrCMCFfDhwa2D7cfzwgycnA+4G3VNW9R36BqtpeVRuqasPY2Nhc5pUkHWGYkO8D1gxsrwXumNxIEuDdwEer6hOjHU+SNJthQr4b2JRkWZLTgXOBmwb2vwu4sar+fh7mkyTN4pTZLlBVdyXZQXfu+yTgCuCi/pz4PcBlwM1JXtT/l9dV1d75GliS9GCzhhygqrYB26bZvWp040iSjpav7JSkxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWqcIZekxhlySWrcUCFPsjnJ3iQ3Jrn0iH1PTzKe5IYkr56fMSVJ0zlltgskWQVsBc4HVgDXJ7myqg4kOQl4O7AR2N/v21lV35zPoSVJPzbMEflGYFdVHaiq/cAe4Lx+35OBW6rqO1V1P/AR4DfmZ1RJ0lSGCfk64PaB7TuB1UPskyQdB8OEfDlwaGD7cP8x274HJNnSn0cfn5iYONZZJUlTGCbk+4A1A9trgTuG2PeAqtpeVRuqasPY2NixzipJmsIwId8NbEqyLMnpwLnATf2+G4CnJVmVZBnwbOAT8zOqJGkqsz5rparuSrIDuI4u/FcAFyVZWVU7k/whXexPAt5VVd+b14klSQ8ya8gBqmobsG2afbuAXaMcSpI0PF/ZKUmNM+SS1DhDLkmNM+SS1LhU1fH9hskE8I1j/O9nAt8e4TgtcM0nBtd8YpjLmh9dVVO+EOe4h3wukoxX1YaFnuN4cs0nBtd8YpivNXtqRZIaZ8glqXGthXz7Qg+wAFzzicE1nxjmZc1NnSOXJP2k1o7IJUlHMOSS1DhDLkmNM+RatJJkoWeQWtBMyJM8Ksnjjvi3Jf2LnuTxSZ6aZNVCz3K8JPmlJBcAVFUt9esYIMkzk7xsoec4npL8TJInLfQcx1OSsSRr5+NrD/X3yBdakmcDrwUOJtlL9yYXH6+qg0lSS/CpN0meAfwR3dvpfSXJjqr68gKPNW/6YJ8O/B2wL8mpVfWJPuYnVdVPvBfsUpDkqcCfAr+/0LMcL0kuAd4I3JPk1qp63ULPNN+SPAfYChxOcgPwj8Bto2rXoj8iT7IC2AxcXlUXAbfRvd3ci5OcvEQj/rPA5cDLqur5QAG/tbBTza/qfA/YCVwLXNzfgLNUI977VeB9VXVtkocnOSfJaUv1nkj/dpGXAS+tqo3AzyU5a4HHmlf9ml8OvBJ4FnAQeC5wwaiu50UfciDACuCMfvtDdO8Zuha4AJbkKZZ7gF0DR+DvANYkaeIe1LEYuA4PAquAq+h+0Lckubi/TAs/r0fr/4Dv9p9/APgz4K3AS5boegFOA1b3nx8CXp3kVUl+eQn+Lk86FVhRVQeAvwC+AzwFWD+KL77of1Cq6kfA+4AXJjmn374GmAAu7C+zpI7Kq+qbdHe96N/UegVwFvCQ/t9+auGmmzeTP4tXAndU1ceAu4A3Az8PS/bI/Bbg8v59cbdX1Sa698D9BbrgLSlV9X3gb4G3Jnkv8L90N9oPBV7M0l3zP9C91/Gaqvoh3Y32aXRrnrNFGfIkj0zymIF/uhb4AvC8JI+tqh9V1XuAJyZ55MJMOVpHrrmqftB/ehj4AbC/qn6Q5IXA7y2Fo/PBNVfVof6fC3hCko3AxcDfAI/rt5s3xfU8TncE/kTg/v7fPgqsY0RHawttijX/C/Ac4Ga6x7o+W1V/Tncve/3CTDk6SZYnObX/fPIexq10B2IXJlnd/36/DfjFJHO+8Vp0MejPi74BuDvJ14D30B217AZ+E3hNkg/T3ZqdRncaomlTrPm9wK1VdX8fuB8m+XqSV9Ddgr+6qg4u4MhzNsWad9A9+PPFJNXve3NVXdc/8HvLAo47EtOs+fPAJ+nudbwsyX66U0tnAd9aqFlHZZo131pVdye5je4odS/d+s9gCawZuARYm+RDVfUtgKr6jyRjdKdTVie5iu6G6+F0p5fmZFH9rZUkP00XsTf3v9BvpDu39Fng03R/lP0C4EV0AX9nVf3nQs07CtOs+SHAvwFXV9X9/amUm4B7gRdU1dcWbuK5m2nNVfXJJJcBX6+qa/vLN/+slRl+tvfQXc8H+8cCnt//l7+qqlsXaNyRmOF6vgG4GngE3YHJU+huvLYugTU/GfgY8EHgq8CHqmrfEfufBDyPrmFvG0XDFlvIVwIfBv6iqq5JcjLw28DZwAcHn36XZFlV3b9Ao47MDGteB3y4qr7UP4/8pcBVVXXbwk07GrNczx+oqv9eKtfvpBnW/Ci6X/YvDVy2+RsumHXNH6yqryRZDfwIWFZVEws47kike63L44EvA8+kO/j6p8GY95dbSdffH47i+y6qc+RVdQ/dn3k8L8n6/rTCB+jOE7+8f3rWpiTLl8ov+Qxrhu6u9hnA04H3LoWIw6zX8+/2a352kuULOecozbDmorueB3+2m484zLrm30nyMOBpdI//NB9xgKr6AvDP/VH2NXT3QF40+Vhekg1Jfh24d1QRh0UW8t71wMl0Dwo8uqoOVtXb6M4nraU7Kr1vQSccvZnW/Cjg06O80heJ6da8hu7eyKdOsOv5RPvZnryedy+VG65Jk49fVdUNdKeFlwMbk7wKeA3wX6N+pt2iOrUyKcnZdC8auA/4HLCS7gGTZy7BoAGuGdfsmpeodC/wewdwDt1jXF8c+fdYjCEHSPIIugc2NwN3swQe2JyNa3bNS9WJuOZJ6f4kwduB5w8+FjLS77FYQz5p8vmYVXXvQs9yvLjmE4NrPjGk++Ng91TVV+bteyz2kEuSZrYYH+yUJB0FQy5JjTPkktQ4Qy5JjTPkktQ4Qy5JjTPkktQ4Qy5Jjft/06ZSve4nNJ4AAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei']\n",
"\n",
"\n",
"labels = labels\n",
"x = growth\n",
"\n",
"ax = plt.axes()\n",
"ax.set_axisbelow(True)\n",
"\n",
"ax.bar(x=labels, height=x)\n",
"plt.xticks(rotation=45)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.317703Z",
"start_time": "2021-07-09T05:02:51.303707Z"
}
},
"outputs": [],
"source": [
"# 排名清單\n",
"\n",
"new_data = []\n",
"for item in data:\n",
" row = []\n",
" row.append(item[0])\n",
" row += item[1]\n",
" new_data.append(row)\n",
" \n",
"lists = pd.DataFrame(new_data)\n",
"lists.index = [i for i in range(1, len(lists)+1)]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:02:51.444703Z",
"start_time": "2021-07-09T05:02:51.319704Z"
}
},
"outputs": [
{
"ename": "ValueError",
"evalue": "Length mismatch: Expected axis has 0 elements, new values have 5 elements",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mlists\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'query'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'growth'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'clicks'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'ctr'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'impressions'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mlists\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m12\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[1;34m(self, name, value)\u001b[0m\n\u001b[0;32m 5473\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5474\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5475\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5476\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5477\u001b[0m \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\properties.pyx\u001b[0m in \u001b[0;36mpandas._libs.properties.AxisProperty.__set__\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m_set_axis\u001b[1;34m(self, axis, labels)\u001b[0m\n\u001b[0;32m 667\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_set_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mIndex\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 668\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mensure_index\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 669\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_mgr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 670\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_clear_item_cache\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 671\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\internals\\managers.py\u001b[0m in \u001b[0;36mset_axis\u001b[1;34m(self, axis, new_labels)\u001b[0m\n\u001b[0;32m 219\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mnew_len\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[0mold_len\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 220\u001b[0m raise ValueError(\n\u001b[1;32m--> 221\u001b[1;33m \u001b[1;34mf\"Length mismatch: Expected axis has {old_len} elements, new \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 222\u001b[0m \u001b[1;34mf\"values have {new_len} elements\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 223\u001b[0m )\n",
"\u001b[1;31mValueError\u001b[0m: Length mismatch: Expected axis has 0 elements, new values have 5 elements"
]
}
],
"source": [
"lists.columns = ['query', 'growth', 'clicks', 'ctr', 'impressions']\n",
"lists.head(12)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 社群使用者成長率\n",
"- \n",
"- 計算: user、newuser、sessions、pageviews、bounceRate(%)、pv_session\n",
"- 要根據社群區分\n",
" - organic / social"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:12.048761Z",
"start_time": "2021-07-09T05:03:11.178952Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" ga:date | \n",
" ga:users | \n",
" ga:newusers | \n",
" ga:sessions | \n",
" ga:pageviews | \n",
" ga:bounceRate | \n",
" ga:pageviewsPerSession | \n",
" ga:isoWeek | \n",
" category | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 685 | \n",
" 20210531 | \n",
" 8285 | \n",
" 6030 | \n",
" 8759 | \n",
" 12271 | \n",
" 85.69471400844846 | \n",
" 1.4009590135860257 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 1 | \n",
" 686 | \n",
" 20210601 | \n",
" 4554 | \n",
" 2820 | \n",
" 4964 | \n",
" 8531 | \n",
" 76.89363416599517 | \n",
" 1.718573730862208 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 2 | \n",
" 687 | \n",
" 20210602 | \n",
" 4387 | \n",
" 2610 | \n",
" 4758 | \n",
" 8381 | \n",
" 73.43421605716688 | \n",
" 1.7614543926019335 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 3 | \n",
" 688 | \n",
" 20210603 | \n",
" 6682 | \n",
" 3705 | \n",
" 7420 | \n",
" 11906 | \n",
" 78.71967654986523 | \n",
" 1.6045822102425875 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 4 | \n",
" 689 | \n",
" 20210604 | \n",
" 10787 | \n",
" 5981 | \n",
" 11787 | \n",
" 23667 | \n",
" 65.9964367523543 | \n",
" 2.007890048358361 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 73 | \n",
" 758 | \n",
" 20210704 | \n",
" 12004 | \n",
" 9511 | \n",
" 14005 | \n",
" 44185 | \n",
" 66.68332738307747 | \n",
" 3.1549446626204927 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 74 | \n",
" 759 | \n",
" 20210705 | \n",
" 11172 | \n",
" 8454 | \n",
" 13028 | \n",
" 40090 | \n",
" 66.31869818851705 | \n",
" 3.0772182990482038 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
" 75 | \n",
" 760 | \n",
" 20210706 | \n",
" 10870 | \n",
" 8102 | \n",
" 12532 | \n",
" 37871 | \n",
" 66.74912224704755 | \n",
" 3.0219438238110437 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
" 76 | \n",
" 761 | \n",
" 20210707 | \n",
" 10337 | \n",
" 7813 | \n",
" 12008 | \n",
" 35427 | \n",
" 66.48067954696869 | \n",
" 2.9502831445702866 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
" 77 | \n",
" 762 | \n",
" 20210708 | \n",
" 10754 | \n",
" 8361 | \n",
" 12715 | \n",
" 33819 | \n",
" 60.802202123476214 | \n",
" 2.659771922925678 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
"
\n",
"
78 rows × 10 columns
\n",
"
"
],
"text/plain": [
" id ga:date ga:users ga:newusers ga:sessions ga:pageviews \\\n",
"0 685 20210531 8285 6030 8759 12271 \n",
"1 686 20210601 4554 2820 4964 8531 \n",
"2 687 20210602 4387 2610 4758 8381 \n",
"3 688 20210603 6682 3705 7420 11906 \n",
"4 689 20210604 10787 5981 11787 23667 \n",
".. ... ... ... ... ... ... \n",
"73 758 20210704 12004 9511 14005 44185 \n",
"74 759 20210705 11172 8454 13028 40090 \n",
"75 760 20210706 10870 8102 12532 37871 \n",
"76 761 20210707 10337 7813 12008 35427 \n",
"77 762 20210708 10754 8361 12715 33819 \n",
"\n",
" ga:bounceRate ga:pageviewsPerSession ga:isoWeek category \n",
"0 85.69471400844846 1.4009590135860257 22 social \n",
"1 76.89363416599517 1.718573730862208 22 social \n",
"2 73.43421605716688 1.7614543926019335 22 social \n",
"3 78.71967654986523 1.6045822102425875 22 social \n",
"4 65.9964367523543 2.007890048358361 22 social \n",
".. ... ... ... ... \n",
"73 66.68332738307747 3.1549446626204927 26 organic \n",
"74 66.31869818851705 3.0772182990482038 27 organic \n",
"75 66.74912224704755 3.0219438238110437 27 organic \n",
"76 66.48067954696869 2.9502831445702866 27 organic \n",
"77 60.802202123476214 2.659771922925678 27 organic \n",
"\n",
"[78 rows x 10 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_weekly = get_data('hhh_weekly_report')\n",
"df_weekly"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:15.683751Z",
"start_time": "2021-07-09T05:03:15.675746Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array(['social', 'organic'], dtype=object)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_weekly.category.unique()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:16.669798Z",
"start_time": "2021-07-09T05:03:16.645773Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 78 entries, 0 to 77\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 78 non-null int64 \n",
" 1 ga:date 78 non-null object\n",
" 2 ga:users 78 non-null object\n",
" 3 ga:newusers 78 non-null object\n",
" 4 ga:sessions 78 non-null object\n",
" 5 ga:pageviews 78 non-null object\n",
" 6 ga:bounceRate 78 non-null object\n",
" 7 ga:pageviewsPerSession 78 non-null object\n",
" 8 ga:isoWeek 78 non-null object\n",
" 9 category 78 non-null object\n",
"dtypes: int64(1), object(9)\n",
"memory usage: 6.2+ KB\n"
]
}
],
"source": [
"df_weekly.info()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:17.128847Z",
"start_time": "2021-07-09T05:03:17.104850Z"
}
},
"outputs": [],
"source": [
"# 先將資料轉型\n",
"\n",
"int_columns = ['id', 'ga:users', 'ga:newusers', 'ga:sessions', 'ga:pageviews', 'ga:isoWeek']\n",
"float_columns = ['ga:bounceRate', 'ga:pageviewsPerSession']\n",
"str_columns = ['category']\n",
"\n",
"for column in int_columns:\n",
" df_weekly[column] = df_weekly[column].apply(func=lambda x: int(x))\n",
"for column in float_columns:\n",
" df_weekly[column] = df_weekly[column].apply(func=lambda x: float(x))\n",
"for column in str_columns:\n",
" df_weekly[column] = df_weekly[column].apply(func=lambda x: str(x))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:17.742227Z",
"start_time": "2021-07-09T05:03:17.703735Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" ga:date | \n",
" ga:users | \n",
" ga:newusers | \n",
" ga:sessions | \n",
" ga:pageviews | \n",
" ga:bounceRate | \n",
" ga:pageviewsPerSession | \n",
" ga:isoWeek | \n",
" category | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 685 | \n",
" 20210531 | \n",
" 8285 | \n",
" 6030 | \n",
" 8759 | \n",
" 12271 | \n",
" 85.694714 | \n",
" 1.400959 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 1 | \n",
" 686 | \n",
" 20210601 | \n",
" 4554 | \n",
" 2820 | \n",
" 4964 | \n",
" 8531 | \n",
" 76.893634 | \n",
" 1.718574 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 2 | \n",
" 687 | \n",
" 20210602 | \n",
" 4387 | \n",
" 2610 | \n",
" 4758 | \n",
" 8381 | \n",
" 73.434216 | \n",
" 1.761454 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 3 | \n",
" 688 | \n",
" 20210603 | \n",
" 6682 | \n",
" 3705 | \n",
" 7420 | \n",
" 11906 | \n",
" 78.719677 | \n",
" 1.604582 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" 4 | \n",
" 689 | \n",
" 20210604 | \n",
" 10787 | \n",
" 5981 | \n",
" 11787 | \n",
" 23667 | \n",
" 65.996437 | \n",
" 2.007890 | \n",
" 22 | \n",
" social | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 73 | \n",
" 758 | \n",
" 20210704 | \n",
" 12004 | \n",
" 9511 | \n",
" 14005 | \n",
" 44185 | \n",
" 66.683327 | \n",
" 3.154945 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 74 | \n",
" 759 | \n",
" 20210705 | \n",
" 11172 | \n",
" 8454 | \n",
" 13028 | \n",
" 40090 | \n",
" 66.318698 | \n",
" 3.077218 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
" 75 | \n",
" 760 | \n",
" 20210706 | \n",
" 10870 | \n",
" 8102 | \n",
" 12532 | \n",
" 37871 | \n",
" 66.749122 | \n",
" 3.021944 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
" 76 | \n",
" 761 | \n",
" 20210707 | \n",
" 10337 | \n",
" 7813 | \n",
" 12008 | \n",
" 35427 | \n",
" 66.480680 | \n",
" 2.950283 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
" 77 | \n",
" 762 | \n",
" 20210708 | \n",
" 10754 | \n",
" 8361 | \n",
" 12715 | \n",
" 33819 | \n",
" 60.802202 | \n",
" 2.659772 | \n",
" 27 | \n",
" organic | \n",
"
\n",
" \n",
"
\n",
"
78 rows × 10 columns
\n",
"
"
],
"text/plain": [
" id ga:date ga:users ga:newusers ga:sessions ga:pageviews \\\n",
"0 685 20210531 8285 6030 8759 12271 \n",
"1 686 20210601 4554 2820 4964 8531 \n",
"2 687 20210602 4387 2610 4758 8381 \n",
"3 688 20210603 6682 3705 7420 11906 \n",
"4 689 20210604 10787 5981 11787 23667 \n",
".. ... ... ... ... ... ... \n",
"73 758 20210704 12004 9511 14005 44185 \n",
"74 759 20210705 11172 8454 13028 40090 \n",
"75 760 20210706 10870 8102 12532 37871 \n",
"76 761 20210707 10337 7813 12008 35427 \n",
"77 762 20210708 10754 8361 12715 33819 \n",
"\n",
" ga:bounceRate ga:pageviewsPerSession ga:isoWeek category \n",
"0 85.694714 1.400959 22 social \n",
"1 76.893634 1.718574 22 social \n",
"2 73.434216 1.761454 22 social \n",
"3 78.719677 1.604582 22 social \n",
"4 65.996437 2.007890 22 social \n",
".. ... ... ... ... \n",
"73 66.683327 3.154945 26 organic \n",
"74 66.318698 3.077218 27 organic \n",
"75 66.749122 3.021944 27 organic \n",
"76 66.480680 2.950283 27 organic \n",
"77 60.802202 2.659772 27 organic \n",
"\n",
"[78 rows x 10 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_weekly"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:18.687294Z",
"start_time": "2021-07-09T05:03:18.676764Z"
}
},
"outputs": [],
"source": [
"# 抓6/18-24, 6/25-7/1\n",
"\n",
"df_weekly['ga:date'] = df_weekly['ga:date'].apply(func=lambda x: datetime.datetime.strptime(x, '%Y%m%d'))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:20.313989Z",
"start_time": "2021-07-09T05:03:20.304943Z"
}
},
"outputs": [],
"source": [
"time_618 = datetime.datetime.strptime('20210618', '%Y%m%d')\n",
"time_625 = datetime.datetime.strptime('20210625', '%Y%m%d')\n",
"time_624 = datetime.datetime.strptime('20210624', '%Y%m%d')\n",
"time_702 = datetime.datetime.strptime('20210701', '%Y%m%d')\n",
"\n",
"\n",
"df_last_week = df_weekly[(time_625 > df_weekly['ga:date']) & (df_weekly['ga:date'] > time_618)]\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:20.875293Z",
"start_time": "2021-07-09T05:03:20.837689Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" ga:date | \n",
" ga:users | \n",
" ga:newusers | \n",
" ga:sessions | \n",
" ga:pageviews | \n",
" ga:bounceRate | \n",
" ga:pageviewsPerSession | \n",
" ga:isoWeek | \n",
" category | \n",
"
\n",
" \n",
" \n",
" \n",
" 28 | \n",
" 713 | \n",
" 2021-06-28 | \n",
" 20790 | \n",
" 17341 | \n",
" 21301 | \n",
" 28963 | \n",
" 86.380921 | \n",
" 1.359701 | \n",
" 26 | \n",
" social | \n",
"
\n",
" \n",
" 29 | \n",
" 714 | \n",
" 2021-06-29 | \n",
" 11507 | \n",
" 7088 | \n",
" 12285 | \n",
" 16687 | \n",
" 87.089947 | \n",
" 1.358323 | \n",
" 26 | \n",
" social | \n",
"
\n",
" \n",
" 30 | \n",
" 715 | \n",
" 2021-06-30 | \n",
" 5330 | \n",
" 3744 | \n",
" 5635 | \n",
" 8237 | \n",
" 83.904170 | \n",
" 1.461757 | \n",
" 26 | \n",
" social | \n",
"
\n",
" \n",
" 31 | \n",
" 716 | \n",
" 2021-07-01 | \n",
" 4531 | \n",
" 2833 | \n",
" 4947 | \n",
" 8419 | \n",
" 76.470588 | \n",
" 1.701839 | \n",
" 26 | \n",
" social | \n",
"
\n",
" \n",
" 32 | \n",
" 717 | \n",
" 2021-07-02 | \n",
" 4240 | \n",
" 2442 | \n",
" 4602 | \n",
" 7438 | \n",
" 78.270317 | \n",
" 1.616254 | \n",
" 26 | \n",
" social | \n",
"
\n",
" \n",
" 33 | \n",
" 718 | \n",
" 2021-07-03 | \n",
" 6544 | \n",
" 3891 | \n",
" 7169 | \n",
" 11842 | \n",
" 76.914493 | \n",
" 1.651834 | \n",
" 26 | \n",
" social | \n",
"
\n",
" \n",
" 34 | \n",
" 719 | \n",
" 2021-07-04 | \n",
" 7065 | \n",
" 4270 | \n",
" 7892 | \n",
" 13884 | \n",
" 76.102382 | \n",
" 1.759250 | \n",
" 26 | \n",
" social | \n",
"
\n",
" \n",
" 67 | \n",
" 752 | \n",
" 2021-06-28 | \n",
" 10488 | \n",
" 8137 | \n",
" 12449 | \n",
" 36690 | \n",
" 66.527432 | \n",
" 2.947225 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 68 | \n",
" 753 | \n",
" 2021-06-29 | \n",
" 10494 | \n",
" 7866 | \n",
" 12224 | \n",
" 36671 | \n",
" 67.588351 | \n",
" 2.999918 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 69 | \n",
" 754 | \n",
" 2021-06-30 | \n",
" 9712 | \n",
" 7406 | \n",
" 11370 | \n",
" 34569 | \n",
" 66.525945 | \n",
" 3.040369 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 70 | \n",
" 755 | \n",
" 2021-07-01 | \n",
" 10535 | \n",
" 8276 | \n",
" 12173 | \n",
" 34250 | \n",
" 61.784277 | \n",
" 2.813604 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 71 | \n",
" 756 | \n",
" 2021-07-02 | \n",
" 10050 | \n",
" 7798 | \n",
" 11623 | \n",
" 32740 | \n",
" 62.875333 | \n",
" 2.816829 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 72 | \n",
" 757 | \n",
" 2021-07-03 | \n",
" 11540 | \n",
" 9094 | \n",
" 13081 | \n",
" 37597 | \n",
" 67.326657 | \n",
" 2.874169 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
" 73 | \n",
" 758 | \n",
" 2021-07-04 | \n",
" 12004 | \n",
" 9511 | \n",
" 14005 | \n",
" 44185 | \n",
" 66.683327 | \n",
" 3.154945 | \n",
" 26 | \n",
" organic | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id ga:date ga:users ga:newusers ga:sessions ga:pageviews \\\n",
"28 713 2021-06-28 20790 17341 21301 28963 \n",
"29 714 2021-06-29 11507 7088 12285 16687 \n",
"30 715 2021-06-30 5330 3744 5635 8237 \n",
"31 716 2021-07-01 4531 2833 4947 8419 \n",
"32 717 2021-07-02 4240 2442 4602 7438 \n",
"33 718 2021-07-03 6544 3891 7169 11842 \n",
"34 719 2021-07-04 7065 4270 7892 13884 \n",
"67 752 2021-06-28 10488 8137 12449 36690 \n",
"68 753 2021-06-29 10494 7866 12224 36671 \n",
"69 754 2021-06-30 9712 7406 11370 34569 \n",
"70 755 2021-07-01 10535 8276 12173 34250 \n",
"71 756 2021-07-02 10050 7798 11623 32740 \n",
"72 757 2021-07-03 11540 9094 13081 37597 \n",
"73 758 2021-07-04 12004 9511 14005 44185 \n",
"\n",
" ga:bounceRate ga:pageviewsPerSession ga:isoWeek category \n",
"28 86.380921 1.359701 26 social \n",
"29 87.089947 1.358323 26 social \n",
"30 83.904170 1.461757 26 social \n",
"31 76.470588 1.701839 26 social \n",
"32 78.270317 1.616254 26 social \n",
"33 76.914493 1.651834 26 social \n",
"34 76.102382 1.759250 26 social \n",
"67 66.527432 2.947225 26 organic \n",
"68 67.588351 2.999918 26 organic \n",
"69 66.525945 3.040369 26 organic \n",
"70 61.784277 2.813604 26 organic \n",
"71 62.875333 2.816829 26 organic \n",
"72 67.326657 2.874169 26 organic \n",
"73 66.683327 3.154945 26 organic "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 這是錯的用不到。\n",
"df_weekly[df_weekly['ga:isoWeek'] == 26]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:21.358700Z",
"start_time": "2021-07-09T05:03:21.352733Z"
}
},
"outputs": [],
"source": [
"# 會有問題,只是統計到目前的...7/2還沒過完,但現在取得的資料已經是上周五過完的資料"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"相對於上周的成長率(social): ...\n",
"\n",
"---\n",
"\n",
"相對於上周的成長率(organtic): ...."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:22.422709Z",
"start_time": "2021-07-09T05:03:22.414709Z"
},
"scrolled": true
},
"outputs": [],
"source": [
"# 計算user成長率(一個族群):\n",
"# 1.上週透過ga:isoWeek取得上週資料、本週資料\n",
"# 2. 兩者去計算成長率\n",
"\n",
"\n",
"# 1.\n",
"# df_last_week = df_weekly[(df_weekly['ga:isoWeek'] == 25) & (df_weekly['category'] == 'organic')]\n",
"# df_curr_week = df_weekly[(df_weekly['ga:isoWeek'] == 26) & (df_weekly['category'] == 'organic')]\n",
"\n",
"\n",
"# last_nums = df_last_week['ga:users'].sum()\n",
"# curr_nums = df_curr_week['ga:users'].sum()\n",
"# growth = (df_curr_week['ga:users'].sum() - df_last_week['ga:users'].sum()) / df_last_week['ga:users'].sum() * 100\n",
"# growth, last_nums, curr_nums"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:03:23.016547Z",
"start_time": "2021-07-09T05:03:22.990551Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(-1.26, 63420, 62619)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_growth( \n",
" column='ga:users', \n",
" cluster='organic', \n",
" time_last_wk_low=datetime.datetime.strptime('20210617', '%Y%m%d'),\n",
" time_last_wk_high=datetime.datetime.strptime('20210624', '%Y%m%d'),\n",
" time_curr_wk_low=datetime.datetime.strptime('20210625', '%Y%m%d'),\n",
" time_curr_wk_high=datetime.datetime.strptime('20210702', '%Y%m%d')\n",
"):\n",
" # 時間用絕對時間判斷\n",
" df_last_week = df_weekly[(df_weekly['ga:date'] > time_last_wk_low) & (df_weekly['ga:date'] < time_last_wk_high)]\n",
" df_curr_week = df_weekly[(df_weekly['ga:date'] > time_curr_wk_low) & (df_weekly['ga:date'] < time_curr_wk_high)]\n",
"\n",
"# df_last_week = df_weekly[(df_weekly['ga:isoWeek'] == (curr_week-1)) & (df_weekly['category'] == cluster)]\n",
"# df_curr_week = df_weekly[(df_weekly['ga:isoWeek'] == curr_week) & (df_weekly['category'] == cluster)]\n",
" \n",
" df_last_week = df_last_week[df_last_week['category'] == cluster]\n",
" df_curr_week = df_curr_week[df_curr_week['category'] == cluster]\n",
" last_nums = df_last_week[column].sum()\n",
" curr_nums = df_curr_week[column].sum()\n",
" growth = (df_curr_week[column].sum() - df_last_week[column].sum()) / df_last_week[column].sum() * 100\n",
" return round(growth, 2), last_nums, curr_nums\n",
"\n",
"get_growth()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:10:02.645569Z",
"start_time": "2021-07-09T05:10:02.539571Z"
}
},
"outputs": [],
"source": [
"columns = ['ga:users','ga:newusers','ga:sessions','ga:pageviews','ga:bounceRate','ga:pageviewsPerSession']\n",
"data = {}\n",
"\n",
"\n",
"for cluster in ['social']:\n",
" for column in columns:\n",
" last_growth, _, _ = get_growth(\n",
" column, \n",
" cluster, \n",
" datetime.datetime.strptime('20210610', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210618', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210617', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210625', '%Y%m%d'),\n",
" )\n",
" growth, last_num, curr_num = get_growth(\n",
" column, \n",
" cluster,\n",
" datetime.datetime.strptime('20210624', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210702', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210701', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210709', '%Y%m%d'),\n",
" )\n",
" data[column[3:]] = [last_num, curr_num] \n",
" data[column[3:] +'growth-rate'] = [last_growth, growth]\n",
"\n",
"data_social = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:10:09.855676Z",
"start_time": "2021-07-09T05:10:09.823676Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" users | \n",
" growth rate | \n",
" new-users | \n",
" growth rate | \n",
" sessions | \n",
" growth rate | \n",
" pageviews | \n",
" growth rate | \n",
" avg-bounceRate | \n",
" growth rate | \n",
" pv-per-session | \n",
" growth rate | \n",
"
\n",
" \n",
" \n",
" \n",
" 2021/6/25-2021/7/1 | \n",
" 134748 | \n",
" 68.88 | \n",
" 101318 | \n",
" 59.25 | \n",
" 141713 | \n",
" 69.04 | \n",
" 204786 | \n",
" 67.94 | \n",
" 83.028839 | \n",
" -1.43 | \n",
" 10.295533 | \n",
" 1.19 | \n",
"
\n",
" \n",
" 2021/7/2-2021/7/8 | \n",
" 45771 | \n",
" -66.03 | \n",
" 27950 | \n",
" -72.41 | \n",
" 49722 | \n",
" -64.91 | \n",
" 79513 | \n",
" -61.17 | \n",
" 80.400954 | \n",
" -3.17 | \n",
" 11.193322 | \n",
" 8.72 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" users growth rate new-users growth rate sessions \\\n",
"2021/6/25-2021/7/1 134748 68.88 101318 59.25 141713 \n",
"2021/7/2-2021/7/8 45771 -66.03 27950 -72.41 49722 \n",
"\n",
" growth rate pageviews growth rate avg-bounceRate \\\n",
"2021/6/25-2021/7/1 69.04 204786 67.94 83.028839 \n",
"2021/7/2-2021/7/8 -64.91 79513 -61.17 80.400954 \n",
"\n",
" growth rate pv-per-session growth rate \n",
"2021/6/25-2021/7/1 -1.43 10.295533 1.19 \n",
"2021/7/2-2021/7/8 -3.17 11.193322 8.72 "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_social.columns = ['users', 'growth rate', 'new-users', 'growth rate', 'sessions', 'growth rate', 'pageviews', 'growth rate', 'avg-bounceRate', 'growth rate', 'pv-per-session', 'growth rate']\n",
"data_social.iloc[:, 8] = data_social.iloc[:, 8] / 7\n",
"data_social.index = ['2021/6/25-2021/7/1', '2021/7/2-2021/7/8']\n",
"data_social"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"ExecuteTime": {
"end_time": "2021-07-09T05:10:27.340927Z",
"start_time": "2021-07-09T05:10:27.223935Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" users | \n",
" growth rate(%) | \n",
" new-users | \n",
" growth rate(%) | \n",
" sessions | \n",
" growth rate(%) | \n",
" pageviews | \n",
" growth rate(%) | \n",
" avg-bounceRate | \n",
" growth rate(%) | \n",
" pv-per-session | \n",
" growth rate(%) | \n",
"
\n",
" \n",
" \n",
" \n",
" 2021/6/25-2021/7/1 | \n",
" 72433 | \n",
" 2.54 | \n",
" 55814 | \n",
" 1.66 | \n",
" 84829 | \n",
" 2.78 | \n",
" 252745 | \n",
" 5.56 | \n",
" 65.562333 | \n",
" -0.08 | \n",
" 20.869821 | \n",
" 2.51 | \n",
"
\n",
" \n",
" 2021/7/2-2021/7/8 | \n",
" 76727 | \n",
" 5.93 | \n",
" 59133 | \n",
" 5.95 | \n",
" 88992 | \n",
" 4.91 | \n",
" 261729 | \n",
" 3.55 | \n",
" 65.319431 | \n",
" -0.37 | \n",
" 20.555159 | \n",
" -1.51 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" users growth rate(%) new-users growth rate(%) \\\n",
"2021/6/25-2021/7/1 72433 2.54 55814 1.66 \n",
"2021/7/2-2021/7/8 76727 5.93 59133 5.95 \n",
"\n",
" sessions growth rate(%) pageviews growth rate(%) \\\n",
"2021/6/25-2021/7/1 84829 2.78 252745 5.56 \n",
"2021/7/2-2021/7/8 88992 4.91 261729 3.55 \n",
"\n",
" avg-bounceRate growth rate(%) pv-per-session \\\n",
"2021/6/25-2021/7/1 65.562333 -0.08 20.869821 \n",
"2021/7/2-2021/7/8 65.319431 -0.37 20.555159 \n",
"\n",
" growth rate(%) \n",
"2021/6/25-2021/7/1 2.51 \n",
"2021/7/2-2021/7/8 -1.51 "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"columns = ['ga:users','ga:newusers','ga:sessions','ga:pageviews','ga:bounceRate','ga:pageviewsPerSession']\n",
"data = {}\n",
"\n",
"\n",
"for cluster in ['organic']:\n",
" for column in columns:\n",
" last_growth, _, _ = get_growth(\n",
" column, \n",
" cluster, \n",
" datetime.datetime.strptime('20210610', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210618', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210617', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210625', '%Y%m%d'),\n",
" )\n",
" growth, last_num, curr_num = get_growth( \n",
" column, \n",
" cluster,\n",
" datetime.datetime.strptime('20210624', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210702', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210701', '%Y%m%d'),\n",
" datetime.datetime.strptime('20210709', '%Y%m%d'),\n",
" )\n",
" data[column[3:]] = [last_num, curr_num] \n",
" data[column[3:] +'growth-rate'] = [last_growth, growth]\n",
"\n",
"data_organic = pd.DataFrame(data)\n",
"data_organic.columns = ['users', 'growth rate(%)', 'new-users', 'growth rate(%)', 'sessions', 'growth rate(%)', 'pageviews', 'growth rate(%)', 'avg-bounceRate', 'growth rate(%)', 'pv-per-session', 'growth rate(%)']\n",
"data_organic.iloc[:, 8] = data_organic.iloc[:, 8] / 7\n",
"data_organic.index = ['2021/6/25-2021/7/1', '2021/7/2-2021/7/8']\n",
"data_organic"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"人口成長"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.3 64-bit",
"language": "python",
"name": "python37364bit6893c7013b164b1189a865dcaea9fb2f"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}