jared 3 年之前
父節點
當前提交
59d1aed468
共有 1 個文件被更改,包括 49 次插入16 次删除
  1. 49 16
      shop_item_list.py

+ 49 - 16
shop_item_list.py

@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
-from selenium import webdriver
+#from selenium import webdriver
+from seleniumwire import webdriver
 from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
@@ -21,6 +22,8 @@ import json
 import re
 import sys, os
 import socket
+import brotli
+
 chrome_window=False
 
 def brower_start(port):
@@ -132,24 +135,28 @@ def keyin_keyword(driver, keyword):
     
 
 def get_crawler_list(db):
-    result = db.query('select * from shop_item_list order by keyword')
-    result = pd.DataFrame([i for i in result])
-    result = result[~result.keyword.str.contains('項')]
-
-    progress = db.query('select distinct(kw) from progress_list2 where num < 367')
-    progress = pd.DataFrame([i for i in progress])
-
-    if len(progress) != 0:
-        keyword = result[~result['keyword'].isin(progress.kw.to_list())].iloc[0]['keyword']
-    else:
-        keyword = result.iloc[0]['keyword']
-        
-    return keyword
-
+#    result = db.query('select * from shop_item_list order by keyword')
+#    result = pd.DataFrame([i for i in result])
+#    result = result[~result.keyword.str.contains('項')]
+
+#    progress = db.query('select distinct(kw) from progress_list2 where num < 367')
+#    progress = pd.DataFrame([i for i in progress])
+
+#    if len(progress) != 0:
+#        keyword = result[~result['keyword'].isin(progress.kw.to_list())].iloc[0]['keyword']
+#    else:
+#        keyword = result.iloc[0]['keyword']
+#        
+#    return keyword
+    return '滷味'
+    cursor = db.query('select distinct(kw) from progress_list2 where num < 367 order by num asc limit 1')
+    for c in cursor:
+        return c['kw']
+    return None
 
 def get_lon_lat_list(db, keyword):
     num=0
-    cursor=db.query('select num from progress_list where kw = "'+keyword+'"')
+    cursor=db.query('select num from progress_list2 where kw = "'+keyword+'"')
     for c in cursor:
         num=c['num']
         break
@@ -188,6 +195,7 @@ def main():
             keyword  = get_crawler_list(db)
             print(keyword)
             lst = get_lon_lat_list(db, keyword)
+#            print(lst)
             print(keyword, len(lst))
 
             for r in lst:
@@ -198,8 +206,33 @@ def main():
 
                 url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
                 driver.get(url)
+
+
                 keyin_keyword(driver, keyword)
                 failcnt = 0
+
+
+
+
+                time.sleep(10)
+                for request in driver.requests:
+                    if request.response:
+                        if 'https://www.google.com.tw/search?tbm=map' in request.url :
+                            print(
+                                request.url,
+                                request.response.status_code,
+                                request.response.headers['Content-Type']
+                            )
+                            print('parsing js:')
+                            resp = brotli.decompress(request.response.body)
+                            jstext=resp.decode('utf-8')
+                            print(jstext)
+                            jsobj=json.loads(jstext)
+                            print(jsobj)
+#                sys.exit()
+
+
+
                 for page in range(10):
                     print(keyword, latitude, longitude, page)
                     url_list = get_url_list(driver)