Bläddra i källkod

update blacklist

Jason 1 år sedan
förälder
incheckning
4ec95c528c
2 ändrade filer med 4 tillägg och 5 borttagningar
  1. 2 3
      website_clickjobs/gen_seo.py
  2. 2 2
      website_clickjobs/gen_seo2.py

+ 2 - 3
website_clickjobs/gen_seo.py

@@ -39,8 +39,7 @@ def send_msg(kw):
     params = {"message":kw}  
     r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
 
-blacklist = ['https://www.chinatimes.com/realtimenews/20220613003142-260402']
-
+blacklist = ['https://www.chinatimes.com/realtimenews/20220613003142-260402','https://ipo168.pixnet.net/blog/post/207626239-%E5%95%9F%E7%BF%94%E8%BC%95%E9%87%91%E5%B1%AC%E7%A7%91%E6%8A%80%E8%82%A1%E7%A5%A8%E6%98%AF%E9%80%99%E6%A8%A3%E7%9A%84%E5%85%AC%E5%8F%B8%21%21%E6%8A%95%E8%B3%87%E5%89%8D%E8%A6%81','https://latest.mediatagtw.com/article/%e5%95%9f%e7%bf%94%e8%bc%95%e9%87%91%e5%b1%ac%e7%a7%91%e6%8a%80%e8%82%a1%e4%bb%bd%e6%9c%89%e9%99%90%e5%85%ac%e5%8f%b8#gsc.tab=0']
 
 def re_get_webdriver():
     global port
@@ -131,7 +130,7 @@ def run_once(jsobj):
             txt=elmt.text
             if len(txt)>10:
                 if domain is not None:
-                    if domain in href:
+                    if domain in href and href not in blacklist:
                         print('found....')
                         print('clicked....')
                         print(href)

+ 2 - 2
website_clickjobs/gen_seo2.py

@@ -39,7 +39,7 @@ def send_msg(kw):
     params = {"message":kw}  
     r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
 
-blacklist = ['https://www.chinatimes.com/realtimenews/20220613003142-260402']
+blacklist = ['https://www.chinatimes.com/realtimenews/20220613003142-260402','https://ipo168.pixnet.net/blog/post/207626239-%E5%95%9F%E7%BF%94%E8%BC%95%E9%87%91%E5%B1%AC%E7%A7%91%E6%8A%80%E8%82%A1%E7%A5%A8%E6%98%AF%E9%80%99%E6%A8%A3%E7%9A%84%E5%85%AC%E5%8F%B8%21%21%E6%8A%95%E8%B3%87%E5%89%8D%E8%A6%81','https://latest.mediatagtw.com/article/%e5%95%9f%e7%bf%94%e8%bc%95%e9%87%91%e5%b1%ac%e7%a7%91%e6%8a%80%e8%82%a1%e4%bb%bd%e6%9c%89%e9%99%90%e5%85%ac%e5%8f%b8#gsc.tab=0']
 
 
 def re_get_webdriver():
@@ -131,7 +131,7 @@ def run_once(jsobj):
             txt=elmt.text
             if len(txt)>10:
                 if domain is not None:
-                    if domain in href:
+                    if domain in href and href not in blacklist:
                         print('found....')
                         print('clicked....')
                         print(href)