|
@@ -20,6 +20,7 @@ import traceback
|
|
|
import json
|
|
|
import re
|
|
|
import os
|
|
|
+import selenium
|
|
|
|
|
|
def brower_start(port):
|
|
|
options = webdriver.ChromeOptions()
|
|
@@ -34,9 +35,12 @@ def brower_start(port):
|
|
|
|
|
|
|
|
|
def get_url_list(driver):
|
|
|
- wait = WebDriverWait(driver, 60)
|
|
|
- wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="ppdPk-Ej1Yeb-LgbsSe-tJiF1e"]')))
|
|
|
-
|
|
|
+ wait = WebDriverWait(driver, 30)
|
|
|
+ try:
|
|
|
+ wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="ppdPk-Ej1Yeb-LgbsSe-tJiF1e"]')))
|
|
|
+ except selenium.common.exceptions.TimeoutException:
|
|
|
+ traceback.print_exc()
|
|
|
+ return "EMPTY"
|
|
|
|
|
|
# elmts=driver.find_elements_by_xpath("//div[contains(@class,'siAUzd-neVct section-scrollbox') and not( contains(@role,'region') )]")
|
|
|
|
|
@@ -122,10 +126,15 @@ def main():
|
|
|
driver.get(url)
|
|
|
|
|
|
keyin_keyword(driver, keyword)
|
|
|
-
|
|
|
+ failcnt=0
|
|
|
for page in range(4):
|
|
|
print( r['loc'], latitude, longitude, page)
|
|
|
url_list = get_url_list(driver)
|
|
|
+ if url_list == 'EMPTY':
|
|
|
+ failcnt+=1
|
|
|
+ if failcnt >=2:
|
|
|
+ break
|
|
|
+ continue
|
|
|
print(url_list)
|
|
|
shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
|
|
|
for item in url_list:
|