Jared 2 years ago
parent
commit
e6fd3b1198
1 changed files with 14 additions and 1 deletions
  1. 14 1
      run4.py

+ 14 - 1
run4.py

@@ -489,7 +489,8 @@ def main():
     error_table = db['error_list2']
 
     iddict=build_cache(db)
-    
+    print("iddict...{}".format(datetime.now()))
+
     port=4444
     if len(sys.argv) == 3 :
         port=int(sys.argv[1])
@@ -506,6 +507,7 @@ def main():
 
     job = get_next_job(db)
     c = 0
+
     for row, group in job.iterrows():
         try:
             item_url = group['item_url']
@@ -526,6 +528,8 @@ def main():
             print('parsing shop info....')
             for i in range(5):
                 print('shop info try...{}'.format(i))
+                print("shop info try...{}".format(datetime.now()))
+
                 driver.get(item_url)
                 time.sleep(3)                
 
@@ -546,11 +550,15 @@ def main():
 
             # reivews
             print('parsing reviews....')
+            print("parsing reviews.....{}".format(datetime.now()))
+
             if not output['user_ratings_total']:
                 output['reviews'] = ''
             else:
                 for i in range(3):
                     print('reviews try...{}'.format(i))
+                    print("reviews try.....{}".format(datetime.now()))
+
                     try:
                         wait = WebDriverWait(driver, 30)
                         more_reviews_css = "button[jsaction='pane.rating.moreReviews']"
@@ -578,6 +586,8 @@ def main():
             if output['header_image'] != '':
                 for i in range(3):
                     print('photo try...{}'.format(i))
+                    print("photo try......{}".format(datetime.now()))
+
                     driver.get(item_url)
                     time.sleep(0.5)
                     print(driver.current_url)
@@ -608,6 +618,8 @@ def main():
 
             print(output)
             save_js_to_db(output, fid)
+            print("save_js_to_db......{}".format(datetime.now()))
+
             error_table.upsert({'item_url':item_url,'check_':1},['item_url'])
             print('*'*10)
 
@@ -623,6 +635,7 @@ def main():
             error_table3 = db['error_list3']
             error_table3.insert({'name':name,'keyword':keyword,'item_url':item_url,'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
             traceback.print_exc()
+        sys.exit()
             
 
 if __name__ == '__main__':