فهرست منبع

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools

Jason 11 ماه پیش
والد
کامیت
3d843ed5ee
1فایلهای تغییر یافته به همراه194 افزوده شده و 0 حذف شده
  1. 194 0
      SEO/click_negative.py

+ 194 - 0
SEO/click_negative.py

@@ -0,0 +1,194 @@
+# import redis
+import time
+import traceback
+# import json
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+# import urllib
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+# import redis
+import sys
+import codecs
+import random
+import datetime
+import os
+import time
+import requests
+import urllib.parse
+import ast
+
+driver = None
+
+db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
+
+# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+def re_get_webdriver():
+    global port
+    global driver
+    global portnum
+    global is_docker
+    result = []
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        print('quit....')
+        driver = None
+    try:
+        s = Service('/Users/mac/Downloads/119/chromedriver')
+        options = webdriver.ChromeOptions()
+        options.add_argument('--headless')
+
+        # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
+        options.add_argument("--incognito")
+        driver = webdriver.Chrome(
+            options=options, service=s)
+        driver.delete_all_cookies()
+        driver.set_window_size(1400, 1000)
+    except:
+        traceback.print_exc()
+        driver = None
+        return None
+    return driver
+
+
+def run_once(jsobj):
+    table = db['seo_jobs_ranking']
+    history = db['seo_search_history']
+    nda_log = db['nda_log']
+
+    print(jsobj)
+    neg_word = ast.literal_eval(jsobj['neg_word'])
+    print('這裏',neg_word)
+    i = 100
+    while True:
+        driver = re_get_webdriver()
+        print('re_get_webdriver')
+        if driver is not None:
+            break
+        time.sleep(3)
+    try:
+        kw = jsobj['kw']
+        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW')
+        driver.get(googleurl)
+
+        time.sleep(6)
+        print(driver.current_url)
+        # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+        # time.sleep(1)
+        # elmt.send_keys(kw)
+        # elmt.send_keys(Keys.ENTER)
+        # time.sleep(6)
+
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
+
+        numresults = len(elmts)
+
+        print('搜尋結果數量', numresults)
+        if numresults == 0:
+            print(driver.current_url)
+            print(driver.title)
+            sys.exit()
+        #        time.sleep(9999)
+
+        idx = 1
+        found = False
+        test_lst = []
+        clickelmt = None
+        neg_count = 0
+        neg_total = 0
+        clickidx = 0
+        clickhref = ''
+        clicktitle = ''
+        for elmt in elmts:
+            href = elmt.get_attribute('href')
+            txt = elmt.text
+            history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()})
+            # if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt:
+            #     neg_count+=1
+            #     neg_total+=idx
+            # print('分數',neg_total, neg_count)
+            for i in neg_word:
+                print(i)
+                if i in txt:
+                    neg_count += 1
+                    neg_total += idx
+            print('分數',neg_total, neg_count)
+            if domain in href:
+                print('found....')
+
+                print(href)
+                print(txt)
+                print("ranking", idx)
+                found = True
+                clickelmt = elmt
+                clickidx = idx
+                clickhref = href
+                clicktitle = txt
+                nda_log.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt': datetime.datetime.now(), 'client': jsobj['client']})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                print('clicked....')
+                time.sleep(5)
+                if neg_count == 0:
+                    negstr = '0'
+                else:
+                    negstr = str(neg_total / neg_count)
+                print(' negative: ' + negstr)
+                table.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle,
+                              'avg_neg': negstr, 'dt': datetime.datetime.now()})
+                db.close()
+                break
+            else:
+                nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']})
+            idx += 1
+
+        db.close()
+
+    except:
+        traceback.print_exc()
+
+        print('exception')
+        traceback.print_exc()
+        db.close()
+    driver.quit()
+
+cursor = db.query("select cust, json from public.seo_job where cust='信義房屋' order by random() limit 1")
+cursor_n = db.query("select * from public.neg_word where client='信義房屋'")
+
+for c in cursor:
+    js_string = c['json']
+    js = json.loads(js_string)
+    prefix=js['prefix']
+    postfix=js['postfix']
+    domain=js['domain'][0]
+    positive=js['positive']
+    rnd=js['rnd']
+
+kw1=random.choice(positive)
+kw2=random.choice(rnd)
+# kw=kw1+" "+prefix+" "+kw2
+kw = prefix + " " + kw1
+for c in cursor_n:
+    neg_word = c['neg_word']
+
+
+while True:
+    run_once({'domain':domain,'kw':'信義 房屋','client':'信義房屋','neg_word':neg_word})
+    print('等待下次執行')
+    time.sleep(80)
+
+
+
+
+