noodles пре 3 година
родитељ
комит
b656b5ed33
1 измењених фајлова са 17 додато и 11 уклоњено
  1. 17 11
      run.py

+ 17 - 11
run.py

@@ -45,7 +45,7 @@ def brower_start(port):
 #    browser = webdriver.Chrome(options=options)
 
     browser = webdriver.Remote(
-        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
+        command_executor='http://192.53.174.202:'+str(port)+'/wd/hub',
         desired_capabilities=options.to_capabilities()
     )
     return browser
@@ -118,8 +118,13 @@ def get_intro_info(driver, output):
     element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}簡介']".format(output['name']))
     driver.implicitly_wait(10)
     ActionChains(driver).move_to_element(element).click(element).perform()
-    
-    page_down_(driver, "//div[@class='siAUzd-neVct section-scrollbox cYB2Ge-oHo7ed cYB2Ge-ti6hGc']", 3)
+
+    # pageSource = driver.page_source
+    # fileToWrite = open("page_source.html", "w")
+    # fileToWrite.write(pageSource)
+    # fileToWrite.close()
+
+    page_down_(driver, '//*[@id="pane"]/div/div[1]', 3)
 
     intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
     for key in intro_list:
@@ -217,8 +222,9 @@ def get_reviews(driver, output):
     driver.implicitly_wait(10)
     ActionChains(driver).move_to_element(element).click(element).perform()
     time.sleep(0.5)
-    
-    page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
+
+    # page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
+    page_down_(driver, '//div[@class="PPCwl"]',5)
     all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
     for ap in all_photo:
         ap.click()
@@ -445,7 +451,6 @@ def page_down_(driver, xpath_css, time_):
         elmt=elmts[1]
     else:
         elmt=elmts[0]
-        
     actions = ActionChains(driver)
     actions.move_to_element(elmt).click().perform()
     for i in range(time_):
@@ -468,11 +473,11 @@ def main():
         port=int(sys.argv[2])
 
     url_pd = get_not_cralwer_url(keyword)
-    print('drvier start...')
+    print('drvier start {}...'.format(keyword))
     driver = brower_start(port)
     # driver = serive_create('Profile 1')
-    # profilepath = 'Profile 1'
-    # driver = serive_create_linux(profilepath)
+    #profilepath = 'Profile 1'
+    #driver = serive_create_linux(profilepath)
     
     for key, row in url_pd.iterrows():
         try:    
@@ -520,12 +525,13 @@ def main():
             output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
             data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
 
-        except:
+        except Exception as e:
+            print(e)
             error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
             data_select_insert(db, 'error_list', error_table_col, row)
             driver.close()
             driver = brower_start(port)
-            # driver = serive_create_linux(profilepath)
+            driver = serive_create_linux(profilepath)