aaron-1015 3 лет назад
Родитель
Сommit
a9c37d5108
2 измененных файлов с 948 добавлено и 264 удалено
  1. 714 0
      .ipynb_checkpoints/topic-tree爬資料-checkpoint.ipynb
  2. 234 264
      topic-tree爬資料.ipynb

Разница между файлами не показана из-за своего большого размера
+ 714 - 0
.ipynb_checkpoints/topic-tree爬資料-checkpoint.ipynb


+ 234 - 264
topic-tree爬資料.ipynb

@@ -25,7 +25,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Using matplotlib backend: TkAgg\n"
+      "Using matplotlib backend: Qt5Agg\n"
      ]
     }
    ],
@@ -314,7 +314,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:21:40.253769Z",
@@ -354,28 +354,28 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>2021-07-04 19:00:00</th>\n",
-       "      <td>29</td>\n",
+       "      <th>2021-07-10 14:00:00</th>\n",
+       "      <td>27</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-04 20:00:00</th>\n",
-       "      <td>0</td>\n",
+       "      <th>2021-07-10 15:00:00</th>\n",
+       "      <td>17</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-04 21:00:00</th>\n",
-       "      <td>0</td>\n",
+       "      <th>2021-07-10 16:00:00</th>\n",
+       "      <td>38</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-04 22:00:00</th>\n",
-       "      <td>50</td>\n",
+       "      <th>2021-07-10 17:00:00</th>\n",
+       "      <td>15</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-04 23:00:00</th>\n",
-       "      <td>16</td>\n",
+       "      <th>2021-07-10 18:00:00</th>\n",
+       "      <td>11</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -384,28 +384,28 @@
        "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-11 14:00:00</th>\n",
-       "      <td>30</td>\n",
+       "      <th>2021-07-17 09:00:00</th>\n",
+       "      <td>34</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-11 15:00:00</th>\n",
-       "      <td>46</td>\n",
+       "      <th>2021-07-17 10:00:00</th>\n",
+       "      <td>26</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-11 16:00:00</th>\n",
-       "      <td>29</td>\n",
+       "      <th>2021-07-17 11:00:00</th>\n",
+       "      <td>15</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-11 17:00:00</th>\n",
-       "      <td>33</td>\n",
+       "      <th>2021-07-17 12:00:00</th>\n",
+       "      <td>23</td>\n",
        "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2021-07-11 18:00:00</th>\n",
-       "      <td>43</td>\n",
+       "      <th>2021-07-17 13:00:00</th>\n",
+       "      <td>27</td>\n",
        "      <td>True</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -416,22 +416,22 @@
       "text/plain": [
        "                     Topic isPartial\n",
        "date                                \n",
-       "2021-07-04 19:00:00     29     False\n",
-       "2021-07-04 20:00:00      0     False\n",
-       "2021-07-04 21:00:00      0     False\n",
-       "2021-07-04 22:00:00     50     False\n",
-       "2021-07-04 23:00:00     16     False\n",
+       "2021-07-10 14:00:00     27     False\n",
+       "2021-07-10 15:00:00     17     False\n",
+       "2021-07-10 16:00:00     38     False\n",
+       "2021-07-10 17:00:00     15     False\n",
+       "2021-07-10 18:00:00     11     False\n",
        "...                    ...       ...\n",
-       "2021-07-11 14:00:00     30     False\n",
-       "2021-07-11 15:00:00     46     False\n",
-       "2021-07-11 16:00:00     29     False\n",
-       "2021-07-11 17:00:00     33     False\n",
-       "2021-07-11 18:00:00     43      True\n",
+       "2021-07-17 09:00:00     34     False\n",
+       "2021-07-17 10:00:00     26     False\n",
+       "2021-07-17 11:00:00     15     False\n",
+       "2021-07-17 12:00:00     23     False\n",
+       "2021-07-17 13:00:00     27      True\n",
        "\n",
        "[168 rows x 2 columns]"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -462,7 +462,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:24:40.443622Z",
@@ -473,10 +473,10 @@
     {
      "data": {
       "text/plain": [
-       "'2021-07-04 19:00:00'"
+       "'2021-07-10 14:00:00'"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -487,7 +487,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:25:17.152721Z",
@@ -498,10 +498,10 @@
     {
      "data": {
       "text/plain": [
-       "datetime.datetime(2021, 7, 4, 19, 0)"
+       "datetime.datetime(2021, 7, 10, 14, 0)"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -512,7 +512,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:26:34.467056Z",
@@ -524,146 +524,7 @@
     {
      "data": {
       "text/plain": [
-       "array([datetime.datetime(2021, 7, 4, 19, 0),\n",
-       "       datetime.datetime(2021, 7, 4, 20, 0),\n",
-       "       datetime.datetime(2021, 7, 4, 21, 0),\n",
-       "       datetime.datetime(2021, 7, 4, 22, 0),\n",
-       "       datetime.datetime(2021, 7, 4, 23, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 0, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 1, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 2, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 3, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 4, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 5, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 6, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 7, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 8, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 9, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 10, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 11, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 12, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 13, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 14, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 15, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 16, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 17, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 18, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 19, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 20, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 21, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 22, 0),\n",
-       "       datetime.datetime(2021, 7, 5, 23, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 0, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 1, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 2, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 3, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 4, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 5, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 6, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 7, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 8, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 9, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 10, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 11, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 12, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 13, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 14, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 15, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 16, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 17, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 18, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 19, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 20, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 21, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 22, 0),\n",
-       "       datetime.datetime(2021, 7, 6, 23, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 0, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 1, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 2, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 3, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 4, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 5, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 6, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 7, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 8, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 9, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 10, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 11, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 12, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 13, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 14, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 15, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 16, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 17, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 18, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 19, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 20, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 21, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 22, 0),\n",
-       "       datetime.datetime(2021, 7, 7, 23, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 0, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 1, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 2, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 3, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 4, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 5, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 6, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 7, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 8, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 9, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 10, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 11, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 12, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 13, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 14, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 15, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 16, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 17, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 18, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 19, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 20, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 21, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 22, 0),\n",
-       "       datetime.datetime(2021, 7, 8, 23, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 0, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 1, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 2, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 3, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 4, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 5, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 6, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 7, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 8, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 9, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 10, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 11, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 12, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 13, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 14, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 15, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 16, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 17, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 18, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 19, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 20, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 21, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 22, 0),\n",
-       "       datetime.datetime(2021, 7, 9, 23, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 0, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 1, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 2, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 3, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 4, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 5, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 6, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 7, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 8, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 9, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 10, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 11, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 12, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 13, 0),\n",
-       "       datetime.datetime(2021, 7, 10, 14, 0),\n",
+       "array([datetime.datetime(2021, 7, 10, 14, 0),\n",
        "       datetime.datetime(2021, 7, 10, 15, 0),\n",
        "       datetime.datetime(2021, 7, 10, 16, 0),\n",
        "       datetime.datetime(2021, 7, 10, 17, 0),\n",
@@ -691,10 +552,149 @@
        "       datetime.datetime(2021, 7, 11, 15, 0),\n",
        "       datetime.datetime(2021, 7, 11, 16, 0),\n",
        "       datetime.datetime(2021, 7, 11, 17, 0),\n",
-       "       datetime.datetime(2021, 7, 11, 18, 0)], dtype=object)"
+       "       datetime.datetime(2021, 7, 11, 18, 0),\n",
+       "       datetime.datetime(2021, 7, 11, 19, 0),\n",
+       "       datetime.datetime(2021, 7, 11, 20, 0),\n",
+       "       datetime.datetime(2021, 7, 11, 21, 0),\n",
+       "       datetime.datetime(2021, 7, 11, 22, 0),\n",
+       "       datetime.datetime(2021, 7, 11, 23, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 0, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 1, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 2, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 3, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 4, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 5, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 6, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 7, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 8, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 9, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 10, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 11, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 12, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 13, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 14, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 15, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 16, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 17, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 18, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 19, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 20, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 21, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 22, 0),\n",
+       "       datetime.datetime(2021, 7, 12, 23, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 0, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 1, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 2, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 3, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 4, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 5, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 6, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 7, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 8, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 9, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 10, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 11, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 12, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 13, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 14, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 15, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 16, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 17, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 18, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 19, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 20, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 21, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 22, 0),\n",
+       "       datetime.datetime(2021, 7, 13, 23, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 0, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 1, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 2, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 3, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 4, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 5, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 6, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 7, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 8, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 9, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 10, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 11, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 12, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 13, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 14, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 15, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 16, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 17, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 18, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 19, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 20, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 21, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 22, 0),\n",
+       "       datetime.datetime(2021, 7, 14, 23, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 0, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 1, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 2, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 3, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 4, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 5, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 6, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 7, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 8, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 9, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 10, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 11, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 12, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 13, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 14, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 15, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 16, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 17, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 18, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 19, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 20, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 21, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 22, 0),\n",
+       "       datetime.datetime(2021, 7, 15, 23, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 0, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 1, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 2, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 3, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 4, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 5, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 6, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 7, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 8, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 9, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 10, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 11, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 12, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 13, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 14, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 15, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 16, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 17, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 18, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 19, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 20, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 21, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 22, 0),\n",
+       "       datetime.datetime(2021, 7, 16, 23, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 0, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 1, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 2, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 3, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 4, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 5, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 6, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 7, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 8, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 9, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 10, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 11, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 12, 0),\n",
+       "       datetime.datetime(2021, 7, 17, 13, 0)], dtype=object)"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -723,7 +723,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 9,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:26:55.543347Z",
@@ -756,7 +756,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:26:56.762806Z",
@@ -795,37 +795,37 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-04 19:00:00</td>\n",
-       "      <td>29</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-10 14:00:00</td>\n",
+       "      <td>27</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-04 20:00:00</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-10 15:00:00</td>\n",
+       "      <td>17</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-04 21:00:00</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-10 16:00:00</td>\n",
+       "      <td>38</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-04 22:00:00</td>\n",
-       "      <td>50</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-10 17:00:00</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-04 23:00:00</td>\n",
-       "      <td>16</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-10 18:00:00</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -837,37 +837,37 @@
        "    <tr>\n",
        "      <th>163</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-11 14:00:00</td>\n",
-       "      <td>30</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-17 09:00:00</td>\n",
+       "      <td>34</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>164</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-11 15:00:00</td>\n",
-       "      <td>46</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-17 10:00:00</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>165</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-11 16:00:00</td>\n",
-       "      <td>29</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-17 11:00:00</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>166</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-11 17:00:00</td>\n",
-       "      <td>33</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-17 12:00:00</td>\n",
+       "      <td>23</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>167</th>\n",
        "      <td>Topic</td>\n",
-       "      <td>2021-07-11 18:00:00</td>\n",
-       "      <td>43</td>\n",
-       "      <td>2021-07-12 02:26:55.525348</td>\n",
+       "      <td>2021-07-17 13:00:00</td>\n",
+       "      <td>27</td>\n",
+       "      <td>2021-07-17 21:24:20.921178</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -876,22 +876,22 @@
       ],
       "text/plain": [
        "    iot_kword            iot_date  iot_value                  iot_dtime\n",
-       "0       Topic 2021-07-04 19:00:00         29 2021-07-12 02:26:55.525348\n",
-       "1       Topic 2021-07-04 20:00:00          0 2021-07-12 02:26:55.525348\n",
-       "2       Topic 2021-07-04 21:00:00          0 2021-07-12 02:26:55.525348\n",
-       "3       Topic 2021-07-04 22:00:00         50 2021-07-12 02:26:55.525348\n",
-       "4       Topic 2021-07-04 23:00:00         16 2021-07-12 02:26:55.525348\n",
+       "0       Topic 2021-07-10 14:00:00         27 2021-07-17 21:24:20.921178\n",
+       "1       Topic 2021-07-10 15:00:00         17 2021-07-17 21:24:20.921178\n",
+       "2       Topic 2021-07-10 16:00:00         38 2021-07-17 21:24:20.921178\n",
+       "3       Topic 2021-07-10 17:00:00         15 2021-07-17 21:24:20.921178\n",
+       "4       Topic 2021-07-10 18:00:00         11 2021-07-17 21:24:20.921178\n",
        "..        ...                 ...        ...                        ...\n",
-       "163     Topic 2021-07-11 14:00:00         30 2021-07-12 02:26:55.525348\n",
-       "164     Topic 2021-07-11 15:00:00         46 2021-07-12 02:26:55.525348\n",
-       "165     Topic 2021-07-11 16:00:00         29 2021-07-12 02:26:55.525348\n",
-       "166     Topic 2021-07-11 17:00:00         33 2021-07-12 02:26:55.525348\n",
-       "167     Topic 2021-07-11 18:00:00         43 2021-07-12 02:26:55.525348\n",
+       "163     Topic 2021-07-17 09:00:00         34 2021-07-17 21:24:20.921178\n",
+       "164     Topic 2021-07-17 10:00:00         26 2021-07-17 21:24:20.921178\n",
+       "165     Topic 2021-07-17 11:00:00         15 2021-07-17 21:24:20.921178\n",
+       "166     Topic 2021-07-17 12:00:00         23 2021-07-17 21:24:20.921178\n",
+       "167     Topic 2021-07-17 13:00:00         27 2021-07-17 21:24:20.921178\n",
        "\n",
        "[168 rows x 4 columns]"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -902,7 +902,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 11,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:27:05.443765Z",
@@ -932,7 +932,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 12,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:27:13.270536Z",
@@ -964,7 +964,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 13,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:27:14.664058Z",
@@ -1003,7 +1003,7 @@
        "       'Pill', 'Table condiment', 'Chemical compound'], dtype=object)"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1026,7 +1026,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2021-07-11T18:28:42.804715Z",
@@ -1058,37 +1058,7 @@
       "db updated.\n",
       "Nutrition 資料抓取中...\n",
       "db updating...\n",
-      "db updated.\n",
-      "Illness 資料抓取中...\n",
-      "db updating...\n",
-      "db updated.\n",
-      "Law 資料抓取中...\n",
-      "db updating...\n",
-      "db updated.\n",
-      "Art 資料抓取中...\n",
-      "db updating...\n",
-      "db updated.\n",
-      "Company 資料抓取中...\n",
-      "db updating...\n",
-      "db updated.\n",
-      "Website 資料抓取中...\n",
-      "db updating...\n",
-      "db updated.\n",
-      "Furniture retail company 資料抓取中...\n"
-     ]
-    },
-    {
-     "ename": "IndexError",
-     "evalue": "index 0 is out of bounds for axis 0 with size 0",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-29-bc396af1b4a8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     20\u001b[0m         \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     21\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[0mcrawler_iot_topic_tree\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mto_titles\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32m<ipython-input-29-bc396af1b4a8>\u001b[0m in \u001b[0;36mcrawler_iot_topic_tree\u001b[1;34m(keywords, timeframe)\u001b[0m\n\u001b[0;32m     15\u001b[0m         )\n\u001b[0;32m     16\u001b[0m         \u001b[0mto_topics_interest_over_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpytrend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minterest_over_time\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 17\u001b[1;33m         \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf_to_db\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mto_topics_interest_over_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     18\u001b[0m         \u001b[0mtable\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_table\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'topic_tree_g_trend_iot'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'cmm_test'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     19\u001b[0m         \u001b[0mdata_to_db\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m<ipython-input-24-d931671c8920>\u001b[0m in \u001b[0;36mdf_to_db\u001b[1;34m(df)\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'iot_kword'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'iot_date'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'iot_value'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'iot_dtime'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mlength\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[0miot_kword\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m     \u001b[0mto_db_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   4295\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4296\u001b[0m             \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcast_scalar_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwarn_float\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4297\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mgetitem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   4298\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4299\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0"
+      "db updated.\n"
      ]
     }
    ],
@@ -1127,9 +1097,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.7.3 64-bit",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "python37364bit6893c7013b164b1189a865dcaea9fb2f"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1141,7 +1111,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.9.5"
   }
  },
  "nbformat": 4,

Некоторые файлы не были показаны из-за большого количества измененных файлов