Explorar o código

genseo emergency

Jason hai 1 ano
pai
achega
a8680d1f32
Modificáronse 1 ficheiros con 43 adicións e 21 borrados
  1. 43 21
      website_clickjobs/gen_seo.py

+ 43 - 21
website_clickjobs/gen_seo.py

@@ -38,7 +38,7 @@ def send_msg(kw):
     params = {"message":kw}  
     r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
 
-
+blacklist = ['https://www.chinatimes.com/realtimenews/20220613003142-260402']
 
 
 def re_get_webdriver():
@@ -54,7 +54,7 @@ def re_get_webdriver():
     try:
         options = webdriver.ChromeOptions()
         # options.add_argument("user-agent=%s" % user_agent)
-        # options.add_argument('--headless')
+        options.add_argument('--headless')
         options.add_argument("--incognito")
         driver = webdriver.Chrome(options=options)
         driver.delete_all_cookies()
@@ -115,6 +115,8 @@ def run_once(jsobj):
         found=False
         test_lst=[]
         txt_dict={}
+
+
         for elmt in elmts:
             href=elmt.get_attribute('href')
             txt=elmt.text
@@ -143,7 +145,6 @@ def run_once(jsobj):
                             current_height += scroll_step
 
                         time.sleep(10)
-                        db.close()
                         break
                 else:
                     ex=False
@@ -155,6 +156,7 @@ def run_once(jsobj):
                         txt_dict[elmt]=txt
                     
             idx+=1
+
         if exclude is not None:
             print('exclude')
             elmt=random.choice(test_lst[5:])
@@ -166,8 +168,18 @@ def run_once(jsobj):
 #            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
             time.sleep(5)
 
-        if not found:
-            table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄','client':jsobj['cust']})
+        if not found: #don't waste resources, pick a random link as long as it is ok
+            pick=''
+            negativeflag=True
+            while negativeflag==True:
+                negativeflag=False
+                pick = random.choice(elmts)
+                href = pick.get_attribute('href')
+                if href in blacklist:
+                    negativeflag=True
+            webdriver.ActionChains(driver).move_to_element(pick).perform()
+            webdriver.ActionChains(driver).move_to_element(pick).click().perform()
+            #table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄','client':jsobj['cust']})
 
 
     except:
@@ -178,19 +190,29 @@ def run_once(jsobj):
     # sys.exit()
 
 while True:
-    cursor=db.query('select json from seo_jobs where cust="啟翔" and plan="形象SEO" and json like "%陳百欽%" order by rand() limit 1')
-    for c in cursor:
-        js=json.loads(c['json'])
-        prefix=js['prefix']
-        postfix=js['postfix']
-        domain=js['domain'][0]
-        positive=js['positive']
-        rnd=js['rnd']
-
-    kw1=random.choice(positive)
-    kw2=random.choice(rnd)
-    kw=kw1+" "+prefix+" "+kw2
-    code='03'
-
-    run_once({'domain':domain,'kw':kw, 'cust':'啟翔'})
-    time.sleep(61)
+    try:
+        cursor=db.query('select json from seo.seo_jobs where cust="啟翔" and plan="形象SEO" and json like "%陳百欽%" order by rand() limit 1')
+        for c in cursor:
+            js=json.loads(c['json'])
+            prefix=js['prefix']
+            postfix=js['postfix']
+            domain=js['domain'][0]
+            positive=js['positive']
+            rnd=js['rnd']
+
+        kw=''
+        while '陳百欽' not in kw:
+            kw=''
+            kw1=random.choice(positive)
+            kw2=random.choice(rnd)
+            kw=kw1+" "+prefix+" "+kw2
+            code='03'
+
+        run_once({'domain':domain,'kw':kw, 'cust':'啟翔'})
+        time.sleep(61)
+        cursor=None
+        driver=None
+    except:
+        traceback.print_exc()
+        print("Execution Error")
+        time.sleep(20)