zooey 2 years ago
parent
commit
04c225e60c

+ 2 - 1
INNNews/general_clickbot.py

@@ -61,7 +61,8 @@ def process_query(qs):
     send_kw_elmt.send_keys(Keys.ENTER)
     time.sleep(6)
 
-    elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
+
+
 
     idx=1
     ranking=-1

+ 20 - 20
SEO/clickbot_100.py

@@ -43,24 +43,26 @@ def restart_browser():
 
 def process_one():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
-    lst=['123']
+    lst=[]
     table=db['google_rank']
+    cursor = db.query('select term,domain from seo.select_kw where client="loginheart"')
+    # cursor = db.query('select term,url from seo.sns_kw where client="loginheart"')
     # cursor = db.query('select term from seo.selected_kw where client="鑫富"')
     # cursor = db.query('select term from seo.selected_kw where id between 1902 and 1923')
     # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
-    # for c in cursor:
-    #     lst.append(c['term'])
-    db.close()
-    domain = 'vickybrain.com'
-    for term in lst:
-        print(term)
+    for c in cursor:
+        lst.append([c['term'],c['domain']])
+
+    # domain = 'vickybrain.com'
+    for i in lst[13::]:
+        print(i)
         driver=restart_browser()
-        escaped_search_term=urllib.parse.quote(term)
+        escaped_search_term=urllib.parse.quote(i[0])
         googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
         driver.get(googleurl)
         time.sleep(60)
         print(driver.current_url)
-        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+'.png')
+        # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+'.png')
         df=pd.DataFrame()
         elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
         print('結果數量',len(elmts))
@@ -70,34 +72,32 @@ def process_one():
         for elmt in elmts:
             try:
                 href=elmt.get_attribute('href')
-                if domain in href:
-                    # table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'ranking':cnt})
+                if i[1] == href:
+                    table.insert({'title':elmt.text,'url':href,'keyword':i[0],'dt':datetime.datetime.now(),'ranking':cnt})
                     print(href)
                     print(elmt.text)
-                datadict['搜尋詞'].append(term)
-                datadict['結果標題'].append(elmt.text)
-                datadict['結果網址'].append(href)
-                datadict['結果名次'].append(str(cnt))
+                # datadict['搜尋詞'].append(term)
+                # datadict['結果標題'].append(elmt.text)
+                # datadict['結果網址'].append(href)
+                # datadict['結果名次'].append(str(cnt))
 
                 cnt+=1
             except:
                 print('href2 exception')
                 traceback.print_exc()
 
-        if len(datadict['結果標題'])<=0:
-            print('None')
-            driver.quit()
-            sys.exit()
+
         df['搜尋詞']=datadict['搜尋詞']
         df['結果標題']=datadict['結果標題']
         df['結果網址']=datadict['結果網址']
         df['結果名次']=datadict['結果名次']
 
-        df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+".xls")
+        # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+".xls")
 
         driver.quit()
         print('等待')
         time.sleep(random.randint(100,120))
+    db.close()
 process_one()
 
 

+ 2 - 14
SEO/clickbot_100_one.py

@@ -34,18 +34,6 @@ def restart_browser():
     options.add_argument("--incognito")
     # options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
     driver = webdriver.Chrome(options=options, service=s)
-    =====================================
-
-options = webdriver.ChromeOptions()
-options.add_argument("--no-sandbox")
-options.add_argument('--headless')
-options.add_argument("--disable-dev-shm-usage")
-options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
-options.add_argument('--remote-debugging-port=9222')
-# options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}")
-options.add_argument("--user-agent=" + user_agent)
-options.add_argument("--incognito")
-driver = webdriver.Chrome(executable_path='/root/chromedriver',options=options)
 
     str1 = driver.capabilities['browserVersion']
     driver.delete_all_cookies()
@@ -53,8 +41,8 @@ driver = webdriver.Chrome(executable_path='/root/chromedriver',options=options)
     return driver
 
 def process_one():
-    lst=['123']
-    date='1205_1'
+    lst=['護佳國際']
+    date='1209'
     for term in lst:
         driver=restart_browser()
         escaped_search_term=urllib.parse.quote(term)

+ 18 - 15
SEO/csv_to_sql.py

@@ -7,27 +7,30 @@ pymysql.install_as_MySQLdb()
 
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
-table=db['seo_jobs']
-client='護佳'
+# table=db['seo_jobs']
+table=db['select_kw']
+client='loginheart'
 domain=''
 # for index,row in df.iterrows():
-# with open("C:\/Users\/s1301\/Documents\/關鍵字建議.csv") as f:
+# with open("C:\/Users\/s1301\/Documents\/新飛國際遊學SEO - 關鍵字12.08.csv") as f:
 #     data_all = f.readlines()
-f = open("C:\/Users\/s1301\/Documents\/關鍵字建議.csv")
-df = pd.read_csv(f)
+# print(data_all)
+f = open("C:\/Users\/s1301\/Documents\/關鍵字建議.csv",encoding='utf-8')
+df = pd.read_csv(f,header=None, names=['kw', 'url'])
 df=df.fillna('')
 # print(df)
 for index,row in df.iterrows():
-    prefix='"'+row['prefix']+'"'
-    positive='"'+row['positive']+'"'
-    domain='"'+row['URL']+'"'
-    rnd='"'+str(row['rnd'])+'"'
-    postfix='""'
-    data = f'"prefix":{prefix},"postfix":{postfix},"positive":[{positive}],"domain":[{domain}],"rnd":[{rnd}]'
-    json='{'+data+'}'
-    print(json)
-    table.insert({'cust':client,'plan':'形象SEO','json':json})
-    # table.insert({'term':row['kw'],'client':client,'domain':domain})
+    print(row['kw'],row['url'])
+    # prefix='"'+row['prefix']+'"'
+    # positive='"'+row['positive']+'"'
+    # domain='"'+row['URL']+'"'
+    # rnd='"'+str(row['rnd'])+'"'
+    # postfix='""'
+    # data = f'"prefix":{prefix},"postfix":{postfix},"positive":[{positive}],"domain":[{domain}],"rnd":[{rnd}]'
+    # json='{'+data+'}'
+    # print(json)
+    # table.insert({'cust':client,'plan':'形象SEO','json':json})
+    table.insert({'term':row['kw'],'client':client,'domain':row['url']})
 
 {"prefix": "護佳", "postfix": "", "domain": ["www.hujiai-international.com"], "positive": ["細胞食物","紅蔘"], "rnd": [""]}
 

+ 100 - 0
SEO/google_rank.py

@@ -0,0 +1,100 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+
+
+driver = None
+
+
+def empty_query(q):
+    global driver
+    googleurl = 'https://www.google.com/search?q=' + urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    table = db['google_rank']
+    q = qs
+
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100, 'zh-TW')
+    driver.get(googleurl)
+    print(driver.current_url)
+    time.sleep(6)
+
+    elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+
+    idx = 1
+    ranking = -1
+    print(len(elmts))
+    driver.save_screenshot('c:/tmp/test.png')
+    
+    for elmt in elmts:
+        href = elmt.get_attribute('href')
+        txt = elmt.text
+        table.insert({'title': elmt.text, 'url': href, 'keyword': q, 'dt': datetime.datetime.now(), 'ranking': idx})
+
+        idx += 1
+    db.close()
+
+def run_once(q):
+    global driver
+    result = []
+    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
+    s = Service(r'C:\Users\Administrator\Downloads\chromedriver_108\chromedriver')
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    # options.add_argument('--remote-debugging-port=9222')
+    # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
+    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver = webdriver.Chrome(
+        options=options, service=s)
+    str1 = driver.capabilities['browserVersion']
+    print('版本', str1)
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+
+    print(q)
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+# for c in lst:ㄕ
+# while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+lst = ['波囍','信義房屋','護佳國際','信義房仲','信義 房屋','信義 房仲']
+for i in lst:
+    while True:
+        try:
+            run_once(i)
+        except:
+            continue
+

+ 5 - 3
SEO/mysql_to_redis.py

@@ -2,15 +2,17 @@ import dataset
 import pymysql
 pymysql.install_as_MySQLdb()
 import redis
+import json
 
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
 table=db['selected_kw']
 lst = []
-cur = db.query('select term,domain from seo.selected_kw where client="鑫富"')
+cur = db.query('select term,url from seo.sns_kw where client="loginheart"')
 for c in cur:
-    lst.append([c['term'],c['domain']])
+    lst.append([c['term'],c['url']])
 
 print(lst)
 # r = redis.Redis(host='db.ptt.cx', port=6379, db=1,password='choozmo9')
-# r.set(lst)
+# r.set(lst)
+['真皮迷你包', 'https://www.zeczec.com/projects/loginheart'], ['小廢包 品牌', 'https://www.zeczec.com/projects/loginheart'], ['電子支付包包', 'https://www.zeczec.com/projects/loginheart'], ['感應支付 小包包', 'https://www.zeczec.com/projects/loginheart'], ['一卡通小包', 'https://www.zeczec.com/projects/loginheart'], ['迷你包 推薦', 'https://www.zeczec.com/projects/loginheart'], ['科技 小廢包', 'https://www.zeczec.com/projects/loginheart'], ['小配方包', 'https://www.zeczec.com/projects/loginheart'], ['精品包包', 'https://www.zeczec.com/projects/loginheart'], ['時尚科技 台灣 包包品牌', 'https://www.zeczec.com/projects/loginheart']

+ 8 - 1
SEO/notify_nda.py

@@ -68,4 +68,11 @@ db.close()
 # schtasks /create /sc minute /mo 30 /sd 2022/05/050 /st 9:00 /et 23:00 /tn "linebat" /tr "C:\tmp\inn_line.bat"
 
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(os.path.dirname(SCRIPT_DIR))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+options = webdriver.ChromeOptions()
+options.add_argument('--headless')
+options.add_argument("--no-sandbox")
+options.add_argument("--disable-dev-shm-usage")
+options.add_argument("--incognito")
+driver = webdriver.Remote(options=options, command_executor=f'http://127.0.0.1:4497/wd/hub')

+ 3 - 3
SEO/ranking_report.py

@@ -3,8 +3,8 @@ import dataset
 import pymysql
 pymysql.install_as_MySQLdb()
 
-name='瑞福'
-date = 'start'
+name='宏康'
+date = '1212'
 
 
 def day():
@@ -18,7 +18,7 @@ def day():
 
     rank_lst = []
     for i in kw_lst:
-        cursor_general = db.query(f'select kw,ranking from seo.general_log where kw = "{i}" order by dt limit 1')
+        cursor_general = db.query(f'select kw,ranking from seo.general_log where kw = "{i}" order by dt desc limit 1')
         for c in cursor_general:
             rank_lst.append([c['kw'],c['ranking']])
     db.close()

+ 43 - 49
SEO/ranking_world.py

@@ -98,6 +98,7 @@ def scrolling(driver, pgnum):
 
 def run_once(jsobj):
     table = db['google_rank']
+    date = jsobj['date']
     print(jsobj)
     global driver
 
@@ -111,12 +112,12 @@ def run_once(jsobj):
         kw = jsobj['kw']
         fname = jsobj['fname']
 
-        if jsobj.get('domain') is None:
-            exclude = jsobj['exclude']
-            domain = None
-        else:
-            domain = jsobj['domain']
-            exclude = None
+        # if jsobj.get('domain') is None:
+        #     exclude = jsobj['exclude']
+        #     domain = None
+        # else:
+        #     domain = jsobj['domain']
+        #     exclude = None
         city_map = {'chicago': ['42.04866173771369', '-87.68260072643513'],
                     'miami': ['25.764458843530548', '-80.19787522585152'],
                     'wc': ['38.96071674051165', '-77.03155367248387'],
@@ -145,7 +146,7 @@ def run_once(jsobj):
         elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
         numresults = len(elmts)
         print('搜尋結果數量', numresults)
-        # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/' + fname + ".png")
+        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/' +date +fname + ".png")
         if numresults == 0:
             send_msg('stop working...')
             sys.exit()
@@ -174,42 +175,42 @@ def run_once(jsobj):
             datadict['結果名次'].append(str(idx))
             datadict['結果說明'].append(desc)
 
-            if len(txt) > 10:
-                if domain is not None:
-                    for d in domain:
-                        if d in href:
-                            print('found....')
-                            print('clicked....')
-                            print(href)
-                            print(txt)
-                            print("ranking", idx)
-                            found = True
-                            return
-                else:
-                    ex = False
-                    for ee in exclude:
-                        if ee in href:
-                            ex = True
-                    if not ex:
-                        test_lst.append(elmt)
-                        txt_dict[elmt] = txt
+            # if len(txt) > 10:
+            #     if domain is not None:
+            #         for d in domain:
+            #             if d in href:
+            #                 print('found....')
+            #                 print('clicked....')
+            #                 print(href)
+            #                 print(txt)
+            #                 print("ranking", idx)
+            #                 found = True
+            #                 return
+            #     else:
+            #         ex = False
+            #         for ee in exclude:
+            #             if ee in href:
+            #                 ex = True
+            #         if not ex:
+            #             test_lst.append(elmt)
+            #             txt_dict[elmt] = txt
 
             idx += 1
 
-        if exclude is not None:
-            print('exclude')
-            elmt = random.choice(test_lst[5:])
-            print(elmt)
-            print(txt_dict[elmt])
-
-            webdriver.ActionChains(driver).move_to_element(elmt).perform()
-            #            elmt.click()
-            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-            time.sleep(5)
-
-        if not found:
-            table.insert(
-                {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc})
+        # if exclude is not None:
+        #     print('exclude')
+        #     elmt = random.choice(test_lst[5:])
+        #     print(elmt)
+        #     print(txt_dict[elmt])
+        #
+        #     webdriver.ActionChains(driver).move_to_element(elmt).perform()
+        #     #            elmt.click()
+        #     webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+        #     time.sleep(5)
+        #
+        # if not found:
+        #     table.insert(
+        #         {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc})
 
         df['搜尋詞'] = datadict['搜尋詞']
         df['結果標題'] = datadict['結果標題']
@@ -217,7 +218,7 @@ def run_once(jsobj):
         df['結果名次'] = datadict['結果名次']
         df['結果說明'] = datadict['結果說明']
 
-        # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/'+fname+".xls")
+        df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/'+date+fname+".xls")
 
 
 
@@ -232,11 +233,4 @@ def run_once(jsobj):
     sys.exit()
 
 
-class JParams(object):
-
-    def get(self, jsobj):
-        run_once(jsobj)
-
-
-if __name__ == '__main__':
-    fire.Fire(JParams)
+run_once({'kw':'angelo koo','fname':'miami','date':'1208'})

+ 1 - 1
SEO/sns_rank_report.py

@@ -4,7 +4,7 @@ import pymysql
 pymysql.install_as_MySQLdb()
 
 name='superlink'
-date = '1206'
+date = '1212'
 
 
 def day():