Browse Source

每日排行

zooeytsai 2 years ago
parent
commit
67bcfd4cd8
1 changed files with 129 additions and 0 deletions
  1. 129 0
      SEO/ranking_day.py

+ 129 - 0
SEO/ranking_day.py

@@ -0,0 +1,129 @@
+from random import randint
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import schedule
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import pymysql
+pymysql.install_as_MySQLdb()
+import random
+from selenium.webdriver.chrome.service import Service
+
+
+path = 'C:\portable\chromedriver'
+path_z = '/Users/zooeytsai/Downloads/chromedriver 2'
+driver = None
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+def process_one(item):
+    global driver
+    for i in item:
+        term = i[0]
+        domain = i[1]
+        print(term,domain)
+
+        escaped_search_term = urllib.parse.quote(term)
+        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
+        print(googleurl)
+        driver.get(googleurl)
+        time.sleep(6)
+        # fname=term.replace(' ','_')
+        # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+        # df=pd.DataFrame()
+        
+        elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+        cnt = 1
+        datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
+        
+        for elmt in elmts:
+            try:
+                href = elmt.get_attribute('href')
+                print(href)
+                print(elmt.text)
+                datadict['搜尋詞'].append(term)
+                datadict['結果標題'].append(elmt.text)
+                datadict['結果網址'].append(href)
+                datadict['結果名次'].append(str(cnt))
+                if domain in href:
+                    table.insert(
+                        {'title': elmt.text, 'url': href, 'keyword': term, 'dt': datetime.datetime.now(), 'num': cnt})
+                cnt += 1
+            except:
+                print('href2 exception')
+                traceback.print_exc()
+        if len(datadict['結果標題']) <= 0:
+            print('None')
+            driver.quit()
+            sys.exit()
+        # df['搜尋詞']=datadict['搜尋詞']
+        # df['結果標題']=datadict['結果標題']
+        # df['結果網址']=datadict['結果網址']
+        # df['結果名次']=datadict['結果名次']
+        #
+        # df.to_excel('/Users/zooeytsai/'+fname+".xls")
+        
+        driver.quit()
+        print('中場休息')
+        time.sleep(randint(90, 120))
+
+
+def run_once(pport,item):
+    global driver
+    result = []
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option(f"debuggerAddress", "127.0.0.1:{pport}}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    process_one(item)
+    time.sleep(3)
+    driver.quit()
+    
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+lst = []
+table = db['google_rank']
+cursor = db.query('select term,domain from seo.selected_kw')
+
+for c in cursor:
+    lst.append([c['term'],c['domain']])
+for i in lst:
+    term = i[0]
+    domain = i[1]
+
+for i in lst:
+    run_once('9928',i)
+    run_once('9929',i)
+    time.sleep(randint(250,300))
+
+