Browse Source

Merge remote-tracking branch 'origin/master'

zooeytsai 3 years ago
parent
commit
1cd156f97d
5 changed files with 164 additions and 30 deletions
  1. 6 10
      choozmo/click_commerce.py
  2. 104 0
      choozmo/seo_hhh.py
  3. 2 1
      hhh/SEO/fast_1999.py
  4. 14 19
      hhh/SEO/local_1777.py
  5. 38 0
      tests/rpc_key_test.py

+ 6 - 10
choozmo/click_commerce.py

@@ -10,7 +10,6 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 import codecs
 import random
-import requests
 import dataset
 import time
 import traceback
@@ -23,12 +22,6 @@ headers = {
 }
 
 
-
-def send_msg(kw):
-    params = {"message": "處理關鍵字: "+kw}  
-    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
-
-
 def empty_query(q):
     global driver
     googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
@@ -68,12 +61,15 @@ def process_query(qs):
 
 def run_once(q):
     global driver
+    print('run_once()')
     result=[]
     options = webdriver.ChromeOptions()
     options.add_argument('--headless')
 #    options.add_argument("--user-agent=" +user_agent)
-    options.add_argument("--incognito")
-
+#    options.add_argument("--incognito")
+    options.add_argument('--no-sandbox')
+    options.add_argument("--disable-gpu")
+    options.add_argument('--disable-dev-shm-usage')
     driver = webdriver.Chrome(
     options=options)
 
@@ -103,6 +99,6 @@ while True:
         run_once( (c['term'],c['domain'])   )
     except:
         traceback.print_exc()
-    sleepint=random.randint(380,520)
+    sleepint=random.randint(290,420)
     time.sleep(sleepint)
 

+ 104 - 0
choozmo/seo_hhh.py

@@ -0,0 +1,104 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import dataset
+import time
+import traceback
+import sys
+
+driver=None
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+
+    for elmt in elmts:
+
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+
+
+def run_once(q):
+    global driver
+    print('run_once()')
+    result=[]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+#    options.add_argument("--user-agent=" +user_agent)
+#    options.add_argument("--incognito")
+    options.add_argument('--no-sandbox')
+    options.add_argument("--disable-gpu")
+    options.add_argument('--disable-dev-shm-usage')
+    driver = webdriver.Chrome(
+    options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print(q)
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}]
+lst=[]
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+cursor=db.query('SELECT kw as term,domain FROM seo.seo_clickjobs where category="hhh-faq"  order by rand()')
+
+for c in cursor:
+    lst.append(c)
+
+
+#for c in lst:
+while True:
+    try:
+        c=random.choice(lst)
+        run_once( (c['term'],c['domain'])   )
+    except:
+        traceback.print_exc()
+    sleepint=random.randint(290,420)
+    time.sleep(sleepint)
+

+ 2 - 1
hhh/SEO/fast_1999.py

@@ -92,7 +92,7 @@ def re_get_webdriver():
     time.sleep(4)
     if driver is not None:
         print('closing....')
-#        driver.quit()
+        driver.quit()
         os.system('docker container restart p'+portnum)
         time.sleep(10)
         
@@ -128,6 +128,7 @@ def run_once(url):
     i=random.randint(0,5)
     if i==0 or driver is None:
 #    if True:
+        time.sleep(15)
         re_get_webdriver()
     if driver is None:
         return

+ 14 - 19
hhh/SEO/local_1777.py

@@ -19,35 +19,34 @@ import codecs
 import random
 import os
 import time
-
+from userAgentRandomizer import userAgents
 
 driver=None
 
-
 def re_get_webdriver():
     global driver
     result=[]
-#    client = docker.from_env()
-#    ls=client.containers.list()
-#    print(ls)
-#    for l in ls:
-#        if 'p17777' in l.name:
-#            ls[0].restart()
-
-#    time.sleep(4)
     if driver is not None:
         print('closing....')
         driver.quit()
-#    options = webdriver.EdgeOptions()
+        os.system('killall chrome')
+        print('quit....')
+        driver=None
     try:
+        ua = userAgents()
+
+        user_agent = ua.random()        
 
         options = webdriver.ChromeOptions()
         options.add_argument("--no-sandbox")
         options.add_argument("--disable-dev-shm-usage")
         options.add_argument("--headless")
 
+        print(user_agent)
+        options.add_argument("--user-agent=" +user_agent)
+        options.add_argument("--incognito")
+
         driver = webdriver.Chrome(options=options)
-#        desired_capabilities=options.to_capabilities())
 
         driver.set_window_size(1400,1000)
         return
@@ -56,15 +55,13 @@ def re_get_webdriver():
         traceback.print_exc()
         driver=None
         return None
-    driver=None
 
 def run_once(url):
     global driver
-    i=random.randint(0,15)
+    i=random.randint(0,7)
     if i==0 or driver is None:
-#    if True:
+        time.sleep(8)
         re_get_webdriver()
-        time.sleep(3)
     if driver is None:
         return
     try:
@@ -75,7 +72,6 @@ def run_once(url):
         print('exception')
 
 
-
 lst=[]
 
 
@@ -83,7 +79,7 @@ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb
 
 cursor=db.query('SELECT * FROM columnids order by rand()')
 for c in cursor:
-    lst.append('https://www.hhh.com.tw/columns/detail/'+str(c['cid'])+'/index.php')
+    lst.append('https://www.hhh.com.tw/columns/detail/'+str(c['cid'])+'/')
 
 
 
@@ -94,4 +90,3 @@ while True:
     print(l)
     run_once(l)
 
-

+ 38 - 0
tests/rpc_key_test.py

@@ -0,0 +1,38 @@
+import rpyc
+import time
+import schedule
+s1={'ip':'192.168.192.199','names':['poi1','poi2']}
+s2={'ip':'192.168.192.58','names':['poi1','poi2']}
+s3={'ip':'192.168.192.146','names':['poi1','poi2']}
+s4={'ip':'192.168.192.45','names':['poi1','poi2']}
+s5={'ip':'192.168.192.156','names':['hhhclick1','hhhclick2','seo1','seo2']}
+
+servers=[s1,s2,s3,s4,s5]
+#conn = rpyc.ssl_connect("192.168.192.199", port = 18812, keyfile="c:/tmp/client.key",
+#                        certfile="c:/tmp/client.cer")
+
+def do_job():
+    global servers
+    for srv in servers:
+        print(srv['ip'])
+        conn = rpyc.classic.connect(srv['ip'], port = 18812)
+        ros=conn.modules.os
+        conn.execute('import docker')
+        rdocker=conn.modules.docker
+        client = rdocker.from_env()
+        lst=client.containers.list()
+        cur_names=[]
+        for l in lst:
+            cur_names.append(l.name)
+        for n in srv['names']:
+            if n not in cur_names:
+                print("calling: " +n)
+                ros.system('docker container restart '+n)
+
+do_job()
+schedule.every(3).minutes.do(do_job)
+
+while True:
+    schedule.run_pending()
+    time.sleep(1)
+