|
@@ -489,7 +489,8 @@ def main():
|
|
|
error_table = db['error_list2']
|
|
|
|
|
|
iddict=build_cache(db)
|
|
|
-
|
|
|
+ print("iddict...{}".format(datetime.now()))
|
|
|
+
|
|
|
port=4444
|
|
|
if len(sys.argv) == 3 :
|
|
|
port=int(sys.argv[1])
|
|
@@ -506,6 +507,7 @@ def main():
|
|
|
|
|
|
job = get_next_job(db)
|
|
|
c = 0
|
|
|
+
|
|
|
for row, group in job.iterrows():
|
|
|
try:
|
|
|
item_url = group['item_url']
|
|
@@ -526,6 +528,8 @@ def main():
|
|
|
print('parsing shop info....')
|
|
|
for i in range(5):
|
|
|
print('shop info try...{}'.format(i))
|
|
|
+ print("shop info try...{}".format(datetime.now()))
|
|
|
+
|
|
|
driver.get(item_url)
|
|
|
time.sleep(3)
|
|
|
|
|
@@ -546,11 +550,15 @@ def main():
|
|
|
|
|
|
# reivews
|
|
|
print('parsing reviews....')
|
|
|
+ print("parsing reviews.....{}".format(datetime.now()))
|
|
|
+
|
|
|
if not output['user_ratings_total']:
|
|
|
output['reviews'] = ''
|
|
|
else:
|
|
|
for i in range(3):
|
|
|
print('reviews try...{}'.format(i))
|
|
|
+ print("reviews try.....{}".format(datetime.now()))
|
|
|
+
|
|
|
try:
|
|
|
wait = WebDriverWait(driver, 30)
|
|
|
more_reviews_css = "button[jsaction='pane.rating.moreReviews']"
|
|
@@ -578,6 +586,8 @@ def main():
|
|
|
if output['header_image'] != '':
|
|
|
for i in range(3):
|
|
|
print('photo try...{}'.format(i))
|
|
|
+ print("photo try......{}".format(datetime.now()))
|
|
|
+
|
|
|
driver.get(item_url)
|
|
|
time.sleep(0.5)
|
|
|
print(driver.current_url)
|
|
@@ -608,6 +618,8 @@ def main():
|
|
|
|
|
|
print(output)
|
|
|
save_js_to_db(output, fid)
|
|
|
+ print("save_js_to_db......{}".format(datetime.now()))
|
|
|
+
|
|
|
error_table.upsert({'item_url':item_url,'check_':1},['item_url'])
|
|
|
print('*'*10)
|
|
|
|
|
@@ -623,6 +635,7 @@ def main():
|
|
|
error_table3 = db['error_list3']
|
|
|
error_table3.insert({'name':name,'keyword':keyword,'item_url':item_url,'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
|
|
|
traceback.print_exc()
|
|
|
+ sys.exit()
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|