ming 3 年之前
父节点
当前提交
38dad173aa
共有 1 个文件被更改,包括 46 次插入40 次删除
  1. 46 40
      run.py

+ 46 - 40
run.py

@@ -28,10 +28,10 @@ def serive_create(profilepath):
 
     option.add_argument('--disable-web-security')
     option.add_argument('--allow-running-insecure-content') 
-    option.add_argument("--user-data-dir=C:\\Users\\noodles\\AppData\\Local\\Google\\Chrome\\User Data")
+    #option.add_argument("--user-data-dir=C:\\Users\\user\\AppData\\Local\\Google\\Chrome\\User Data")
     option.add_argument("profile-directory="+profilepath)
 
-    driver = webdriver.Chrome('./utility/chromedriver_20211103/chromedriver', options=option)
+    driver = webdriver.Chrome('./utility/chromedriver_win32/chromedriver', options=option)
     executor_url = driver.command_executor._url
     session_id = driver.session_id
     print (session_id)
@@ -45,7 +45,7 @@ def brower_start(port):
 #    browser = webdriver.Chrome(options=options)
 
     browser = webdriver.Remote(
-        command_executor='http://192.53.174.202:'+str(port)+'/wd/hub',
+        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
         desired_capabilities=options.to_capabilities()
     )
     return browser
@@ -115,38 +115,44 @@ def get_shop_info(driver, output, shop_soup):
 
 def get_intro_info(driver, output):
     # element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[6]')
-    element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}簡介']".format(output['name']))
-    driver.implicitly_wait(10)
-    ActionChains(driver).move_to_element(element).click(element).perform()
+    try:
+        element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}簡介']".format(output['name']))
+        driver.implicitly_wait(5)
+        ActionChains(driver).move_to_element(element).click(element).perform()
 
-    # pageSource = driver.page_source
-    # fileToWrite = open("page_source.html", "w")
-    # fileToWrite.write(pageSource)
-    # fileToWrite.close()
-
-    page_down_(driver, '//*[@id="pane"]/div/div[1]', 3)
-
-    intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
-    for key in intro_list:
-        elements = intro_soup.find('div',{'aria-label':key})
-        if elements:
-            element = elements.find_all('li',{'class':'LQjNnc-p83tee-JNdkSc-ibnC6b'})
-            count = 0
-            tmp = []
-            for ele in element:
-                # if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
-                if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/1x/check_black_18dp.png"}):
-                    tmp += [{
-                        'id':count,
-                        intro_list[key][1]: blank_check(ele.text)
-                    }]
-                    count += 1
-            print(str(tmp))
-            output[intro_list[key][0]] = str(tmp)
-        else:
+        # pageSource = driver.page_source
+        # fileToWrite = open("page_source.html", "w")
+        # fileToWrite.write(pageSource)
+        # fileToWrite.close()
+
+        page_down_(driver, '//*[@id="pane"]/div/div[1]', 3)
+
+        intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
+        for key in intro_list:
+            elements = intro_soup.find('div',{'aria-label':key})
+            if elements:
+                element = elements.find_all('li',{'class':'LQjNnc-p83tee-JNdkSc-ibnC6b'})
+                count = 0
+                tmp = []
+                for ele in element:
+                    # if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
+                    if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/1x/check_black_18dp.png"}):
+                        tmp += [{
+                            'id':count,
+                            intro_list[key][1]: blank_check(ele.text)
+                        }]
+                        count += 1
+                print(str(tmp))
+                output[intro_list[key][0]] = str(tmp)
+            else:
+                output[intro_list[key][0]] = '[]'
+        driver.back()
+        return output
+
+    except:
+        for key in intro_list:
             output[intro_list[key][0]] = '[]'
-    driver.back()
-    return output
+        return output
 
 
 def get_time_list(shop_soup, output):
@@ -475,12 +481,12 @@ def main():
     url_pd = get_not_cralwer_url(keyword)
     print('drvier start {}...'.format(keyword))
     driver = brower_start(port)
-    # driver = serive_create('Profile 1')
+    #driver = serive_create('Profile 6')
     #profilepath = 'Profile 1'
     #driver = serive_create_linux(profilepath)
     
     for key, row in url_pd.iterrows():
-        try:    
+        # try:    
             name = row['name']
             item_url = row['item_url']
             print(key, name, ': ' ,item_url)
@@ -525,11 +531,11 @@ def main():
             output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
             data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
 
-        except Exception as e:
-            print(e)
-            error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
-            data_select_insert(db, 'error_list', error_table_col, row)
-            time.sleep(2)
+        # except Exception as e:
+        #     print(e)
+        #     error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
+        #     data_select_insert(db, 'error_list', error_table_col, row)
+        #     time.sleep(2)
             # driver.close()
             # driver = brower_start(port)
             # driver = serive_create_linux(profilepath)