|
@@ -1,14 +1,17 @@
|
|
|
from _clickjob0113 import *
|
|
|
|
|
|
hhhdb = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
-ds=hhhdb.query('select * from hhh.hhh_designer_for_crawl')
|
|
|
-#TAG ABOVE AND UNTAG BELOW FOR DEBUG
|
|
|
-#ds=hhhdb.query('select * from hhh.hhh_designer_for_crawl where hdesigner_id=XXX')
|
|
|
-data=[]
|
|
|
-for i in ds:
|
|
|
- for j in range(i['weight']):
|
|
|
- data.append([i['hdesigner_id'],i['title'].replace('\xa0',' ')])
|
|
|
-random.shuffle(data)
|
|
|
+
|
|
|
+def reset():
|
|
|
+ ds=hhhdb.query('select * from hhh.hhh_designer_for_crawl')
|
|
|
+ #TAG ABOVE AND UNTAG BELOW FOR DEBUG
|
|
|
+ #ds=hhhdb.query('select * from hhh.hhh_designer_for_crawl where hdesigner_id=XXX')
|
|
|
+ data=[]
|
|
|
+ for i in ds:
|
|
|
+ for j in range(i['weight']):
|
|
|
+ data.append([i['hdesigner_id'],i['title'].replace('\xa0',' ')])
|
|
|
+ random.shuffle(data)
|
|
|
+ return data
|
|
|
|
|
|
domain = 'hhh.com.tw'
|
|
|
#target_domain = get_xml_data(sourceurls)
|
|
@@ -19,17 +22,28 @@ brands={domain:'hhh'}
|
|
|
|
|
|
runcount=0
|
|
|
while True:
|
|
|
- print("Run " + str(runcount+1))
|
|
|
- target = random.choice(data) # START HERE
|
|
|
- target_id = str(target[0])
|
|
|
- target_name = target[1]
|
|
|
- print(target_id)
|
|
|
- print(target_name)
|
|
|
- statuscode = execute(domain, target_id, brands, target_name)
|
|
|
- if statuscode == 444:
|
|
|
- print("Completed ", runcount, " times before being caught")
|
|
|
- time.sleep(300)
|
|
|
- else:
|
|
|
- runcount+=1
|
|
|
- time.sleep(40)
|
|
|
-time.sleep(60)
|
|
|
+ try:
|
|
|
+ print("Gathering data...")
|
|
|
+ data = reset()
|
|
|
+ print('Data collected')
|
|
|
+ dataok = 1
|
|
|
+ except:
|
|
|
+ dataok = 0
|
|
|
+ print("Error while collecting data.")
|
|
|
+ if dataok == 1:
|
|
|
+ for i in range(10):
|
|
|
+ print("Run " + str(runcount+1))
|
|
|
+ target = random.choice(data) # START HERE
|
|
|
+ target_id = str(target[0])
|
|
|
+ target_name = target[1]
|
|
|
+ print(target_id)
|
|
|
+ print(target_name)
|
|
|
+ statuscode = execute(domain, target_id, brands, target_name)
|
|
|
+ if statuscode == 444:
|
|
|
+ print("Completed ", runcount, " times before being caught")
|
|
|
+ time.sleep(300)
|
|
|
+ else:
|
|
|
+ runcount+=1
|
|
|
+ time.sleep(40)
|
|
|
+ else:
|
|
|
+ time.sleep(60)
|