Jason 2 år sedan
förälder
incheckning
9231bd6a13
2 ändrade filer med 74 tillägg och 21 borttagningar
  1. 21 21
      website_clickjobs/type-1/_clickjob.py
  2. 53 0
      website_clickjobs/type-1/hhh_e.py

+ 21 - 21
website_clickjobs/type-1/_clickjob.py

@@ -142,24 +142,24 @@ def run_once(domain, target_domain, brands, query):
 #execution starts here
 
 def execute(domain, target_domain, brands, query_list):
-    while True:
-        print("Ctrl+C or Ctrl+Z to stop.")
-        st = timeit.default_timer()
-        try:
-            statuscode = run_once(domain, target_domain, brands, random.choice(query_list))
-        except:
-            traceback.print_exc()
-        timetaken = timeit.default_timer()-st
-        print("Time taken: " + str(timetaken))
-        
-        print("Process returned with " + str(statuscode))
-        if statuscode == 444:
-            print("You have been caught!!! Program terminating.")
-            break
-
-        extrasleep = 0
-        if(timetaken < 70):
-            extrasleep = 70 - timetaken
-        print("Ctrl+C or Ctrl+Z to stop now.")
-        print("You have " + str(10 + extrasleep) + " seconds.")
-        time.sleep(10 + extrasleep)
+    print("Ctrl+C or Ctrl+Z to stop.")
+    st = timeit.default_timer()
+    try:
+        statuscode = run_once(domain, target_domain, brands, random.choice(query_list))
+    except:
+        traceback.print_exc()
+    timetaken = timeit.default_timer()-st
+    print("Time taken: " + str(timetaken))
+    
+    print("Process returned with " + str(statuscode))
+    if statuscode == 444:
+        print("You have been caught!!! Program terminating.")
+        return statuscode
+
+    extrasleep = 0
+    if(timetaken < 70):
+        extrasleep = 70 - timetaken
+    print("Ctrl+C or Ctrl+Z to stop now.")
+    print("You have " + str(10 + extrasleep) + " seconds.")
+    time.sleep(10 + extrasleep)
+    return statuscode

+ 53 - 0
website_clickjobs/type-1/hhh_e.py

@@ -0,0 +1,53 @@
+from _clickjob import *
+import xmltodict
+
+def get_xml_data(urls):
+    data = []
+    if len(priority)!=0:
+        for p in priority:
+            link = 'https://hhh.com.tw/HHH_NEW/designers_static/' + str(p[0]) + '.html'
+            data.append('https://hhh.com.tw/HHH_NEW/designers_static/' + str(p[0]) + '.html')
+            p[0] = 'https://hhh.com.tw/HHH_NEW/designers_static/' + str(p[0]) + '.html'
+    else:
+        for u in urls:
+            page = requests.get(u)
+            designerdata = xmltodict.parse(page.content)['urlset']['url']
+            for d in designerdata:
+                data.append(d['loc']) 
+
+    if len(data)>=10:
+        print(data[:10])
+    else:
+        print(data)
+    return data
+
+sourceurls = ["https://hhh.com.tw/HHH_NEW/sitemap/sitemap.xml","https://m.hhh.com.tw/HHH_NEW/sitemap/sitemap.xml"]
+
+# PRIORITY IDs, clear list (make it become [] ) to disable priority
+priority = [['https://hhh.com.tw/brand-index.php?brand_id=216', '好萊得'], ['https://hhh.com.tw/brand-index.php?brand_id=211', '艾立思']]
+#priority = [[54, '虞國綸', '格綸設計'], [793, '許伯争', '樂作空間設計'], 32]      # RESERVED -- LEVEL 2
+#priority = [[54, '虞國綸', '格綸設計'], [793, '許伯争', '樂作空間設計'], 32, 331] # RESERVED -- LEVEL 3
+
+domain = 'hhh.com.tw'
+#target_domain = get_xml_data(sourceurls)
+brands={domain:'hhh'}
+query_list = ('幸福空間')
+
+statuscode = 0
+
+while True:
+    if len(priority)!=0:
+        target = random.choice(priority)
+        ql = target[1:]
+        if random.randint(0, 3) % 2 == 3:
+            for q in range (len(ql)):
+                ql[q] = '幸福空間 ' + ql[q]
+        print(ql)        
+        statuscode = execute(domain, target[0], brands, ql)
+    else:
+        target = random.choice(target_domain)
+        #statuscode = execute(domain, target_domain, brands, query_list)
+        print(len(target_domain))
+    if statuscode == 444:
+        break
+time.sleep(60)