@@ -0,0 +1,236 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.service import Service
+import json
+import random
+import time
+import datetime
+import sys
+import codecs
+import random
+import os
+import time
+import requests
+import pymysql
+import urllib.parse
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+headers = {
+ "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
+ "Content-Type": "application/x-www-form-urlencoded"
+def send_msg(kw):
+ params = {"message":kw}
+ r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+blacklist = ['https://www.chinatimes.com/realtimenews/20220613003142-260402']
+def re_get_webdriver():
+ global port
+ global driver
+ result=[]
+ if driver is not None:
+ print('closing....')
+ driver.quit()
+ os.system('killall chrome')
+ print('quit....')
+ driver=None
+ try:
+ options = webdriver.ChromeOptions()
+ # options.add_argument("user-agent=%s" % user_agent)
+ options.add_argument('--headless')
+ options.add_argument("--incognito")
+ driver = webdriver.Chrome(options=options)
+ driver.delete_all_cookies()
+ driver.set_window_size(1400,1000)
+ except:
+ traceback.print_exc()
+ driver=None
+ return None
+def getDriver():
+ options = webdriver.ChromeOptions()
+ #options.add_argument("user-agent=%s" % rua())
+ options.add_argument('--headless')
+ options.add_argument('--incognito')
+ options.add_argument('--no-sandbox')
+ driver=webdriver.Chrome(options=options)
+ driver.set_window_size(1400,1000)
+ return driver
+def run_once(jsobj):
+ table=db['nda_log']
+ print(jsobj)
+ global driver
+# i=random.randint(0,9)
+ i=100
+ driver=getDriver()
+ try:
+ kw=jsobj['kw']
+ if jsobj.get('domain') is None:
+ exclude=jsobj['exclude']
+ domain=None
+ else:
+ domain=jsobj['domain']
+ exclude=None
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW')
+ driver.get(googleurl)
+ time.sleep(6)
+ print(driver.current_url)
+ if 'sorry' in driver.current_url:
+ print("URL Error: Caught")
+ return
+ # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+ # time.sleep(1)
+ # elmt.send_keys(kw)
+ # elmt.send_keys(Keys.ENTER)
+ # time.sleep(6)
+ elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
+ numresults=len(elmts)
+ print('搜尋結果數量',numresults)
+ if numresults==0:
+ send_msg('stop working...')
+ sys.exit()
+ idx=1
+ found=False
+ test_lst=[]
+ txt_dict={}
+ for elmt in elmts:
+ href=elmt.get_attribute('href')
+ txt=elmt.text
+ if len(txt)>10:
+ if domain is not None:
+ if domain in href:
+ print('found....')
+ print('clicked....')
+ print(href)
+ print("ranking", idx)
+ found=True
+ webdriver.ActionChains(driver).move_to_element(elmt).perform()
+ # elmt.click()
+ webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+ table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt,'dt':datetime.datetime.now(),'client':jsobj['cust']})
+ time.sleep(5)
+ page_height = driver.execute_script("return document.body.scrollHeight")
+ scroll_step = page_height // 4
+ current_height = 0
+ while current_height < page_height:
+ driver.execute_script(f"window.scrollTo(0, {current_height + scroll_step});")
+ time.sleep(3)
+ current_height += scroll_step
+ time.sleep(10)
+ break
+ else:
+ ex=False
+ for ee in exclude:
+ if ee in href:
+ ex=True
+ if not ex:
+ test_lst.append(elmt)
+ txt_dict[elmt]=txt
+ idx+=1
+ if exclude is not None:
+ print('exclude')
+ elmt=random.choice(test_lst[5:])
+ print(elmt)
+ print(txt_dict[elmt])
+ webdriver.ActionChains(driver).move_to_element(elmt).perform()
+ elmt.click()
+# webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+ time.sleep(5)
+ if not found: #don't waste resources, pick a random link as long as it is ok
+ attempt=0
+ pick=''
+ negativeflag=True
+ while negativeflag==True:
+ attempt+=1
+ negativeflag=False
+ pick = random.choice(elmts)
+ href = pick.get_attribute('href')
+ if href in blacklist:
+ negativeflag=True
+ '''try:
+ content = pick.find_element(By.XPATH, "//em[@class='VwiC3b yXK7lf lyLwlc yDYNvb W8l4ac lEBKkf']/").text
+ print(content)
+ if "陳百欽" not in content:
+ Exception
+ except:
+ print("Not Found")
+ negativeFlag = True'''
+ if attempt==100:
+ print("Action Terminated")
+ break
+ webdriver.ActionChains(driver).move_to_element(pick).perform()
+ webdriver.ActionChains(driver).move_to_element(pick).click().perform()
+ #table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄','client':jsobj['cust']})
+ except:
+ print('exception')
+ traceback.print_exc()
+ driver.quit()
+ # sys.exit()
+while True:
+ try:
+ cursor=db.query('select json from seo.seo_jobs where cust="啟翔" and plan="形象SEO" and json like "%陳百欽%" and (json like "%chinabiz.org.tw%" or json like "%vocus.cc%" or json like "%tw.news.yahoo.com%" or json like "%facebook.com%" or json like "%gvm.com.tw%" or json like "%fingermedia.tw%" or json like "%bg3.co%" or json like "%morningtaiwan.org%" or json like "%pchome.com.tw%" or json like "%twfile.com%" or json like "%twincn.com%" or json like "%theicons.net%" or json like "%nhu.edu.tw%") order by rand() limit 1')
+ for c in cursor:
+ js=json.loads(c['json'])
+ prefix=js['prefix']
+ postfix=js['postfix']
+ domain=js['domain'][0]
+ positive=js['positive']
+ rnd=js['rnd']
+ kw=''
+ while '陳百欽' not in kw:
+ kw=''
+ kw1=random.choice(positive)
+ kw2=random.choice(rnd)
+ kw=kw1+" "+prefix+" "+kw2
+ code='03'
+ run_once({'domain':domain,'kw':kw, 'cust':'啟翔'})
+ time.sleep(61)
+ cursor=None
+ driver=None
+ except:
+ traceback.print_exc()
+ print("Execution Error")
+ time.sleep(20)