Jason hai 1 ano
pai
achega
bbe7f7638f
Modificáronse 1 ficheiros con 196 adicións e 0 borrados
  1. 196 0
      website_clickjobs/gen_seo.py

+ 196 - 0
website_clickjobs/gen_seo.py

@@ -0,0 +1,196 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.service import Service
+import json
+import random
+import time
+import datetime
+import sys
+import codecs
+import random
+import os
+import time
+import requests
+import pymysql
+import urllib.parse
+pymysql.install_as_MySQLdb()
+driver=None
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+headers = {
+        "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+def send_msg(kw):
+    params = {"message":kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+
+
+def re_get_webdriver():
+    global port
+    global driver
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        os.system('killall chrome')
+        print('quit....')
+        driver=None
+    try:
+        options = webdriver.ChromeOptions()
+        # options.add_argument("user-agent=%s" % user_agent)
+        # options.add_argument('--headless')
+        options.add_argument("--incognito")
+        driver = webdriver.Chrome(options=options)
+        driver.delete_all_cookies()
+        driver.set_window_size(1400,1000)
+    except:
+        traceback.print_exc()
+        driver=None
+        return None
+
+
+
+def run_once(jsobj):
+
+    table=db['nda_log']
+    print(jsobj)
+    global driver
+
+
+#    i=random.randint(0,9)
+    i=100
+    if driver is None:
+        time.sleep(8)
+        re_get_webdriver()
+    if driver is None:
+        return
+    try:
+        kw=jsobj['kw']
+        if jsobj.get('domain') is None:
+            exclude=jsobj['exclude']
+            domain=None
+        else:
+            domain=jsobj['domain']
+            exclude=None
+
+        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW')
+        driver.get(googleurl)
+
+        time.sleep(6)
+        print(driver.current_url)
+        if 'sorry' in driver.current_url:
+            print("URL Error: Caught")
+            return
+        # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+        # time.sleep(1)
+        # elmt.send_keys(kw)
+        # elmt.send_keys(Keys.ENTER)
+        # time.sleep(6)
+
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
+
+        numresults=len(elmts)
+        print('搜尋結果數量',numresults)
+        if numresults==0:
+            send_msg('stop working...')
+            sys.exit()
+
+        idx=1
+        found=False
+        test_lst=[]
+        txt_dict={}
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if domain is not None:
+                    if domain in href:
+                        print('found....')
+                        print('clicked....')
+                        print(href)
+                        print("ranking", idx)
+                        found=True
+
+                        webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                        # elmt.click()
+                        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                        table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt,'dt':datetime.datetime.now(),'client':jsobj['cust']})
+                        time.sleep(5)
+                        page_height = driver.execute_script("return document.body.scrollHeight")
+
+                        scroll_step = page_height // 4
+                        current_height = 0
+
+                        while current_height < page_height:
+                            driver.execute_script(f"window.scrollTo(0, {current_height + scroll_step});")
+                            time.sleep(3)
+                            current_height += scroll_step
+
+                        time.sleep(10)
+                        db.close()
+                        break
+                else:
+                    ex=False
+                    for ee in exclude:
+                        if ee in href:
+                            ex=True
+                    if not ex:
+                        test_lst.append(elmt)
+                        txt_dict[elmt]=txt
+                    
+            idx+=1
+        if exclude is not None:
+            print('exclude')
+            elmt=random.choice(test_lst[5:])
+            print(elmt)
+            print(txt_dict[elmt])
+
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            elmt.click()
+#            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            time.sleep(5)
+
+        if not found:
+            table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄','client':jsobj['cust']})
+
+
+    except:
+        print('exception')
+        traceback.print_exc()
+
+    driver.quit()
+    # sys.exit()
+
+while True:
+    cursor=db.query('select json from seo_jobs where cust="啟翔" and plan="形象SEO" and json like "%陳百欽%" order by rand() limit 1')
+    for c in cursor:
+        js=json.loads(c['json'])
+        prefix=js['prefix']
+        postfix=js['postfix']
+        domain=js['domain'][0]
+        positive=js['positive']
+        rnd=js['rnd']
+
+    kw1=random.choice(positive)
+    kw2=random.choice(rnd)
+    kw=kw1+" "+prefix+" "+kw2
+    code='03'
+
+    run_once({'domain':domain,'kw':kw, 'cust':'啟翔'})
+    time.sleep(61)