jared 3 năm trước cách đây
mục cha
commit
e2dab507f9
2 tập tin đã thay đổi với 12 bổ sung7 xóa
  1. 9 5
      jared_run.py
  2. 3 2
      jared_shop_item_list.py

+ 9 - 5
jared_run.py

@@ -9,6 +9,7 @@ from selenium.webdriver.common.by import By
 import traceback
 from bs4 import BeautifulSoup
 #import datetime
+import pysnooper
 
 from utility import database_access as DA
 from utility.parseutils import *
@@ -71,7 +72,6 @@ def open_time(driver):
     else:
         return 0
 
-
 def get_shop_info(driver, output, shop_soup):
 #    print(datetime.now())
     current_url_split = driver.current_url.split('@')[1].split(',')
@@ -103,12 +103,11 @@ def get_shop_info(driver, output, shop_soup):
 
     return output
 
-
 def get_intro_info(driver, output):
     print(datetime.now())
 
     element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[6]')
-
+    print(element)
     driver.implicitly_wait(20)
     print(datetime.now())
 
@@ -118,6 +117,7 @@ def get_intro_info(driver, output):
     for i in range(5, 35, 3):
         try:
             element = driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[{}]'.format(i))
+            print(element)
             actions = ActionChains(driver)
             actions.move_to_element(element).perform()
         except:
@@ -130,9 +130,13 @@ def get_intro_info(driver, output):
         elements = intro_soup.find('div',{'aria-label':key})
         if elements:
             element = elements.find_all('li',{'class':'LQjNnc-p83tee-JNdkSc-ibnC6b'})
+            print('element')
+            print(element)
             count = 0
             tmp = []
             for ele in element:
+#                print(ele.)
+                print(ele.prettify())
                 # if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
                 if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/1x/check_black_18dp.png"}):
                     tmp += [{
@@ -145,7 +149,7 @@ def get_intro_info(driver, output):
         else:
             output[intro_list[key][0]] = '[]'
     print(datetime.now())
-
+#    time.sleep(9999)
     driver.back()
     return output
 
@@ -212,7 +216,7 @@ def get_time_list(shop_soup, output):
 
     return output
 
-
+@pysnooper.snoop()
 def get_reviews(driver, output):
     wait = WebDriverWait(driver, 30)
     more_reviews_css = "button[jsaction='pane.rating.moreReviews']"

+ 3 - 2
jared_shop_item_list.py

@@ -21,8 +21,9 @@ import re
 
 def brower_start(port):
     options = webdriver.ChromeOptions()
-#    browser = webdriver.Chrome(options=options)
 
+#    browser = webdriver.Chrome(options=options)
+#    上面成功再來用docker
     browser = webdriver.Remote(
         command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
         desired_capabilities=options.to_capabilities()
@@ -64,7 +65,7 @@ def main():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
 
 
-    keyword = '麻辣火鍋'
+    keyword = '青年旅館'
     if len(sys.argv) >1:
         keyword=sys.argv[1]
     port=4444