zooey 2 years ago
parent
commit
d57cbc2699
4 changed files with 228 additions and 16 deletions
  1. 4 4
      SEO/clickbot_100.py
  2. 3 8
      SEO/clickbot_100_one.py
  3. 4 4
      SEO/csv_to_sql.py
  4. 217 0
      SEO/ranking_world_2.py

+ 4 - 4
SEO/clickbot_100.py

@@ -45,10 +45,10 @@ def process_one():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
     lst=[]
     table=db['google_rank']
-    cursor = db.query('select term,domain from seo.selected_kw where client="極簡乳清"')
+    # cursor = db.query('select term,domain from seo.selected_kw where client="護全"')
     # cursor = db.query('select term,url from seo.sns_kw where client="loginheart"')
     # cursor = db.query('select term from seo.selected_kw where client="鑫富"')
-    # cursor = db.query('select term from seo.selected_kw where id between 1902 and 1923')
+    cursor = db.query('select term,domain from seo.selected_kw where id between 2146 and 2155')
     # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
     for c in cursor:
         lst.append([c['term'],c['domain']])
@@ -62,7 +62,7 @@ def process_one():
         driver.get(googleurl)
         time.sleep(60)
         print(driver.current_url)
-        # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/新飛\/'+i[0]+'.png')
+        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/護全\/'+i[0]+'.png')
         df=pd.DataFrame()
         elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
         print('結果數量',len(elmts))
@@ -92,7 +92,7 @@ def process_one():
         df['結果網址']=datadict['結果網址']
         df['結果名次']=datadict['結果名次']
 
-        # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/新飛\/'+i[0]+".xls")
+        df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/護全\/'+i[0]+".xls")
 
         driver.quit()
         print('等待')

+ 3 - 8
SEO/clickbot_100_one.py

@@ -41,8 +41,8 @@ def restart_browser():
     return driver
 
 def process_one():
-    lst=['信義房屋','信義 房屋','信義房仲','信義 房仲']
-    date='1221'
+    lst=['龔國權']
+    date='1223'
     for term in lst:
         driver=restart_browser()
         escaped_search_term=urllib.parse.quote(term)
@@ -87,10 +87,5 @@ def process_one():
 process_one()
 
 
-os.system('docker container stop tiny8')
-time.sleep(3)
-os.system('docker container rm tiny8')
-time.sleep(3)
-os.system('docker run -d -p 9924:9222 --rm --shm-size="900m" --name tiny1 chromedp/headless-shell')
-time.sleep(3)
+
 

+ 4 - 4
SEO/csv_to_sql.py

@@ -10,8 +10,8 @@ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb
 table=db['seo_jobs']
 # table=db['selected_kw']
 # table=db['sns_kw']
-client='泡麵'
-domain='simpleprotein.tw'
+client='信義房屋'
+domain='www.hujiai-international.com'
 # for index,row in df.iterrows():
 # with open("C:\/Users\/s1301\/Documents\/新飛國際遊學SEO - 關鍵字12.08.csv") as f:
 #     data_all = f.readlines()
@@ -19,7 +19,7 @@ domain='simpleprotein.tw'
 f = open("C:\/Users\/s1301\/Documents\/456.csv",encoding='utf-8')
 # df = pd.read_csv(f,header=None, names=['kw', 'url'])
 # df = pd.read_csv(f,header=None, names=['kw'])
-df = pd.read_csv(f,header=None, names=['prefix', 'positive','domain','rnd'])
+df = pd.read_csv(f,header=None, names=['prefix', 'positive','rnd','domain'])
 df=df.fillna('')
 # print(df)
 # url='fflc.tw'
@@ -29,7 +29,7 @@ for index,row in df.iterrows():
     # print(row['kw'])
     # lst.append(row['kw'])
     prefix='"'+row['prefix']+'"'
-    positive=row['positive']
+    positive='"'+row['positive']+'"'
     domain='"'+row['domain']+'"'
     rnd='"'+str(row['rnd'])+'"'
     postfix='""'

+ 217 - 0
SEO/ranking_world_2.py

@@ -0,0 +1,217 @@
+# import redis
+import time
+import traceback
+# import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+import redis
+import sys
+import codecs
+import pandas as pd
+import random
+import os
+import time
+import datetime
+from selenium.webdriver.chrome.service import Service
+import dataset
+import pymysql
+
+pymysql.install_as_MySQLdb()
+
+from userAgentRandomizer import userAgents
+import requests
+
+driver = None
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+headers = {
+    "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
+    "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+def send_msg(kw):
+    params = {"message": kw}
+    r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
+
+
+def re_get_webdriver():
+    global port
+    global driver
+    result = []
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        os.system('killall chrome')
+        print('quit....')
+        driver = None
+    try:
+        ua = userAgents()
+
+        user_agent = ua.random()
+
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        # options.add_argument("--headless")
+        print(user_agent)
+        options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
+        options.add_argument("--incognito")
+        driver = None
+        try:
+            if os.name == 'nt':
+                driver = webdriver.Chrome(options=options)
+
+            else:
+                driver = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Downloads\chromedriver_108\chromedriver', options=options)
+        except:
+            traceback.print_exc()
+            return
+        driver.delete_all_cookies()
+        driver.set_window_size(950, 20000)
+        return
+    except:
+        traceback.print_exc()
+        driver = None
+        return None
+
+
+def scrolling(driver, pgnum):
+    ub = driver.find_element("css selector",'body')
+    for i in range(pgnum):
+        ub.send_keys(Keys.PAGE_DOWN)
+        if pgnum > 1:
+            time.sleep(0.3)
+
+
+def run_once(jsobj):
+    table = db['nda_log']
+
+    print(jsobj)
+    global driver
+
+    if driver is None:
+        time.sleep(8)
+        re_get_webdriver()
+    if driver is None:
+        return
+    try:
+        kw = jsobj['kw']
+        url = jsobj['url']
+
+        # if jsobj.get('domain') is None:
+        #     exclude = jsobj['exclude']
+        #     domain = None
+        # else:
+        #     domain = jsobj['domain']
+        #     exclude = None
+
+        driver.get(url)
+        time.sleep(6)
+        scrolling(driver,10)
+        time.sleep(20)
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+        numresults = len(elmts)
+        print('搜尋結果數量', numresults)
+        time.sleep(20)
+        # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/' +date +fname + ".png")
+        if numresults == 0:
+            send_msg('stop working...')
+            sys.exit()
+        datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': [], '結果說明': []}
+        df = pd.DataFrame()
+
+        idx = 1
+        found = False
+        test_lst = []
+        txt_dict = {}
+        for elmt in elmts:
+            href = elmt.get_attribute('href')
+            txt = elmt.text
+            desc = None
+
+            if domain in href:
+                print(href)
+                print("ranking", idx)
+
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                # elmt.click()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                print('clicked....')
+                table.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,
+                              'dt': datetime.datetime.now(), 'client': 'i5'})
+                time.sleep(6)
+                db.close()
+
+            idx += 1
+
+
+        # if exclude is not None:
+        #     print('exclude')
+        #     elmt = random.choice(test_lst[5:])
+        #     print(elmt)
+        #     print(txt_dict[elmt])
+        #
+        #     webdriver.ActionChains(driver).move_to_element(elmt).perform()
+        #     #            elmt.click()
+        #     webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+        #     time.sleep(5)
+        #
+        # if not found:
+        #     table.insert(
+        #         {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc})
+
+        df['搜尋詞'] = datadict['搜尋詞']
+        df['結果標題'] = datadict['結果標題']
+        df['結果網址'] = datadict['結果網址']
+        df['結果名次'] = datadict['結果名次']
+        df['結果說明'] = datadict['結果說明']
+
+        # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/'+date+fname+".xls")
+
+
+
+
+    except:
+        print('exception')
+        traceback.print_exc()
+    #    time.sleep(9999)
+    #    driver.save_screenshot('c:/tmp/seo/'+kw+".png")
+
+    driver.quit()
+    sys.exit()
+
+
+
+city_lst = ["w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr",
+            "w+CAIQICIYV2FzaGluZ3RvbixVbml0ZWQgU3RhdGVz&gws_rd=cr",
+            "w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr",
+            "w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"]
+
+
+cursor=db.query('select json from seo_jobs where cust="美東" and plan="形象SEO" order by rand() limit 1')
+for c in cursor:
+    js=json.loads(c['json'])
+    prefix=js['prefix']
+    postfix=js['postfix']
+    domain=js['domain'][0]
+    positive=js['positive']
+    rnd=js['rnd']
+
+kw1=random.choice(positive)
+kw2=random.choice(rnd)
+kw=prefix+" "+kw1+" "+kw2
+city = random.choice(city_lst)
+url = f"https://www.google.com/search?q={kw}&hl=en&gl=us&num=100&uule={city}&gws_rd=cr"
+run_once({'kw':kw,'url':url})