|
@@ -1,5 +1,6 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
-from selenium import webdriver
|
|
|
+#from selenium import webdriver
|
|
|
+from seleniumwire import webdriver
|
|
|
from selenium.webdriver.common.action_chains import ActionChains
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
@@ -21,6 +22,8 @@ import json
|
|
|
import re
|
|
|
import sys, os
|
|
|
import socket
|
|
|
+import brotli
|
|
|
+
|
|
|
chrome_window=False
|
|
|
|
|
|
def brower_start(port):
|
|
@@ -132,24 +135,28 @@ def keyin_keyword(driver, keyword):
|
|
|
|
|
|
|
|
|
def get_crawler_list(db):
|
|
|
- result = db.query('select * from shop_item_list order by keyword')
|
|
|
- result = pd.DataFrame([i for i in result])
|
|
|
- result = result[~result.keyword.str.contains('項')]
|
|
|
-
|
|
|
- progress = db.query('select distinct(kw) from progress_list2 where num < 367')
|
|
|
- progress = pd.DataFrame([i for i in progress])
|
|
|
-
|
|
|
- if len(progress) != 0:
|
|
|
- keyword = result[~result['keyword'].isin(progress.kw.to_list())].iloc[0]['keyword']
|
|
|
- else:
|
|
|
- keyword = result.iloc[0]['keyword']
|
|
|
-
|
|
|
- return keyword
|
|
|
-
|
|
|
+# result = db.query('select * from shop_item_list order by keyword')
|
|
|
+# result = pd.DataFrame([i for i in result])
|
|
|
+# result = result[~result.keyword.str.contains('項')]
|
|
|
+
|
|
|
+# progress = db.query('select distinct(kw) from progress_list2 where num < 367')
|
|
|
+# progress = pd.DataFrame([i for i in progress])
|
|
|
+
|
|
|
+# if len(progress) != 0:
|
|
|
+# keyword = result[~result['keyword'].isin(progress.kw.to_list())].iloc[0]['keyword']
|
|
|
+# else:
|
|
|
+# keyword = result.iloc[0]['keyword']
|
|
|
+#
|
|
|
+# return keyword
|
|
|
+ return '滷味'
|
|
|
+ cursor = db.query('select distinct(kw) from progress_list2 where num < 367 order by num asc limit 1')
|
|
|
+ for c in cursor:
|
|
|
+ return c['kw']
|
|
|
+ return None
|
|
|
|
|
|
def get_lon_lat_list(db, keyword):
|
|
|
num=0
|
|
|
- cursor=db.query('select num from progress_list where kw = "'+keyword+'"')
|
|
|
+ cursor=db.query('select num from progress_list2 where kw = "'+keyword+'"')
|
|
|
for c in cursor:
|
|
|
num=c['num']
|
|
|
break
|
|
@@ -188,6 +195,7 @@ def main():
|
|
|
keyword = get_crawler_list(db)
|
|
|
print(keyword)
|
|
|
lst = get_lon_lat_list(db, keyword)
|
|
|
+# print(lst)
|
|
|
print(keyword, len(lst))
|
|
|
|
|
|
for r in lst:
|
|
@@ -198,8 +206,33 @@ def main():
|
|
|
|
|
|
url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
|
|
|
driver.get(url)
|
|
|
+
|
|
|
+
|
|
|
keyin_keyword(driver, keyword)
|
|
|
failcnt = 0
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ time.sleep(10)
|
|
|
+ for request in driver.requests:
|
|
|
+ if request.response:
|
|
|
+ if 'https://www.google.com.tw/search?tbm=map' in request.url :
|
|
|
+ print(
|
|
|
+ request.url,
|
|
|
+ request.response.status_code,
|
|
|
+ request.response.headers['Content-Type']
|
|
|
+ )
|
|
|
+ print('parsing js:')
|
|
|
+ resp = brotli.decompress(request.response.body)
|
|
|
+ jstext=resp.decode('utf-8')
|
|
|
+ print(jstext)
|
|
|
+ jsobj=json.loads(jstext)
|
|
|
+ print(jsobj)
|
|
|
+# sys.exit()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
for page in range(10):
|
|
|
print(keyword, latitude, longitude, page)
|
|
|
url_list = get_url_list(driver)
|