Jared 2 rokov pred
rodič
commit
6564f2dda4
2 zmenil súbory, kde vykonal 58 pridanie a 209 odobranie
  1. 15 0
      README.md
  2. 43 209
      gen_seo.py

+ 15 - 0
README.md

@@ -0,0 +1,15 @@
+docker run -d -p 9922:9222 --rm  --cpu-shares="100" --shm-size="900m" --dns 8.8.8.8 --name tiny1 chromedp/headless-shell:102.0.5005.61
+
+
+docker run -d -p 9922:9222 --rm  --cpu-shares="100" -v /dev/shm:/dev/shm --dns 8.8.8.8 --name tiny1 chromedp/headless-shell:102.0.5005.61
+
+
+rsync -r /tmp/choozmo_select/* root@www.ptt.cx:/home/nginx/farmoutput/choozmo_select
+
+
+rsync -r /tmp/tony-chin/* root@www.ptt.cx:/home/nginx/farmoutput/tony-chin
+
+
+while :; do python3 gen_seo.py tiny1 9922 ; sleep 5; done
+
+while :; do python3 tony.py tiny1 9922 ; sleep 5; done

+ 43 - 209
gen_seo.py

@@ -39,6 +39,7 @@ def send_msg(kw):
 
 
 
 
 def re_get_webdriver():
 def re_get_webdriver():
+    global port
     global driver
     global driver
     result=[]
     result=[]
     if driver is not None:
     if driver is not None:
@@ -56,7 +57,7 @@ def re_get_webdriver():
         options.add_argument("--no-sandbox")
         options.add_argument("--no-sandbox")
         options.add_argument("--disable-dev-shm-usage")
         options.add_argument("--disable-dev-shm-usage")
         options.add_argument("--headless")
         options.add_argument("--headless")
-        options.add_argument('--remote-debugging-port=9922')
+        options.add_argument('--remote-debugging-port='+port)
 #        options.add_experimental_option("debuggerAddress", '127.0.0.1:9922')
 #        options.add_experimental_option("debuggerAddress", '127.0.0.1:9922')
 
 
         print(user_agent)
         print(user_agent)
@@ -64,7 +65,13 @@ def re_get_webdriver():
         options.add_argument("--incognito")
         options.add_argument("--incognito")
         driver=None
         driver=None
         try:
         try:
-            driver = webdriver.Chrome(options=options)
+#            driver = webdriver.Chrome(options=options)
+            if os.name=='nt':
+                driver = webdriver.Chrome(executable_path='C:/portable/webdriver/chrome102/chromedriver.exe',options=options)
+            else:
+                driver = webdriver.Chrome(executable_path='/root/drivers/102/chromedriver',options=options)
+
+
         except:
         except:
 #            driver.quit()
 #            driver.quit()
 #            os.system('pkill -f ')
 #            os.system('pkill -f ')
@@ -81,164 +88,6 @@ def re_get_webdriver():
 
 
 
 
 
 
-def from_shopping(kw):
-    global driver
-    driver.get('https://shopping.google.com')
-    time.sleep(5)
-    elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']")
-    elmt.send_keys('幸福空間') 
-    elmt.send_keys(Keys.ENTER) #hits space
-    time.sleep(7)
-    elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-
-def from_book(kw):
-    global driver
-    driver.get('https://books.google.com/')
-    time.sleep(5)
-    elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']")
-    elmt.send_keys('幸福空間') 
-    elmt.send_keys(Keys.ENTER) #hits space
-    time.sleep(7)
-    elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-
-
-def from_wiki(kw):
-    global driver
-    driver.get('https://en.wikipedia.org/wiki/Google_Search')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-def from_bing(kw):
-    global driver
-    driver.get('https://www.bing.com/search?q=google')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-def from_ecosia(kw):
-    global driver
-    driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-def from_brave(kw):
-    global driver
-    driver.get('https://search.brave.com/search?q=google&source=web')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-
-
-
-
-def from_duckduckgo(kw):
-    global driver
-    driver.get('https://duckduckgo.com/?q=google')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-#    time.sleep(9999)
-    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-
-def from_ekoru(kw):
-    global driver
-    driver.get('https://www.ekoru.org/?q=google')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(5)
-#    time.sleep(9999)
-    elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-
-
-
-
-def from_yahoo(kw):
-    global driver
-    driver.get('https://search.yahoo.com/search?p=google')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(2)
-    driver.switch_to.window(driver.window_handles[1])
-    time.sleep(3)
-    print(driver.current_url)
-    elmt = driver.find_element(By.XPATH, "//input[@name='q']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-def from_gibiru(kw):
-    global driver
-    driver.get('https://gibiru.com/results.html?q=google')
-    time.sleep(4)
-    elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
-    webdriver.ActionChains(driver).move_to_element(elmt).perform()
-    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-    time.sleep(2)
-    driver.switch_to.window(driver.window_handles[1])
-    time.sleep(3)
-    print(driver.current_url)
-    elmt = driver.find_element(By.XPATH, "//input[@name='q']")
-    time.sleep(1)
-    elmt.send_keys(kw)
-    elmt.send_keys(Keys.ENTER)
-    time.sleep(6)
-
-
-
 def run_once(jsobj):
 def run_once(jsobj):
 
 
     table=db['rank_detection']
     table=db['rank_detection']
@@ -261,27 +110,6 @@ def run_once(jsobj):
         else:
         else:
             domain=jsobj['domain']
             domain=jsobj['domain']
             exclude=None
             exclude=None
-        if i==0:
-            from_book(kw)
-        elif i==1:
-            from_shopping(kw)
-        elif i==2:
-            from_wiki(kw)
-        elif i==3:
-            from_bing(kw)
-        elif i==4:
-            from_duckduckgo(kw)
-        elif i==5:
-            from_yahoo(kw)
-        elif i==6:
-            from_gibiru(kw)
-        elif i==7:
-            from_ekoru(kw)
-        elif i==8:
-            from_ecosia(kw)
-        elif i==9:
-            from_brave(kw)
-#        time.sleep(9999)
 
 
 #        driver.get('https://www.google.com?num=100')
 #        driver.get('https://www.google.com?num=100')
         driver.get('https://www.google.com?num=20')
         driver.get('https://www.google.com?num=20')
@@ -305,6 +133,7 @@ def run_once(jsobj):
         idx=1
         idx=1
         found=False
         found=False
         test_lst=[]
         test_lst=[]
+        txt_dict={}
         for elmt in elmts:
         for elmt in elmts:
             href=elmt.get_attribute('href')
             href=elmt.get_attribute('href')
             txt=elmt.text
             txt=elmt.text
@@ -316,25 +145,34 @@ def run_once(jsobj):
                         print(href)
                         print(href)
                         print(txt)
                         print(txt)
                         print("ranking", idx)
                         print("ranking", idx)
-                        table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
                         found=True
                         found=True
+
                         webdriver.ActionChains(driver).move_to_element(elmt).perform()
                         webdriver.ActionChains(driver).move_to_element(elmt).perform()
-                        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-                        time.sleep(5)
+                        elmt.click()
+#                        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                        table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
+
+                        time.sleep(6)
                         break
                         break
                 else:
                 else:
-                    if exclude not in href:
+                    ex=False
+                    for ee in exclude:
+                        if ee in href:
+                            ex=True
+                    if not ex:
                         test_lst.append(elmt)
                         test_lst.append(elmt)
+                        txt_dict[elmt]=txt
                     
                     
             idx+=1
             idx+=1
         if exclude is not None:
         if exclude is not None:
-            test_lst=test_lst[9:]
             print('exclude')
             print('exclude')
-            elmt=random.choice(test_lst)
+            elmt=random.choice(test_lst[5:])
             print(elmt)
             print(elmt)
+            print(txt_dict[elmt])
 
 
             webdriver.ActionChains(driver).move_to_element(elmt).perform()
             webdriver.ActionChains(driver).move_to_element(elmt).perform()
-            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            elmt.click()
+#            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
             time.sleep(5)
             time.sleep(5)
 
 
         if not found:
         if not found:
@@ -347,25 +185,21 @@ def run_once(jsobj):
     driver.quit()
     driver.quit()
     sys.exit()
     sys.exit()
 
 
-
-os.system('docker container restart tiny1')
-time.sleep(6)
-r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
-
-##data=r.get('personal_seo')
-#jstext=data.decode('utf-8')
-#jsobj=json.loads(jstext)
-
-#js=random.choice(jsobj)
-#js=['seo','台北','新北','新竹','竹北','台灣','最強','集仕多','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast']
-js=['seo','台北','新北','新竹','竹北','台灣','最強','choozmo','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast','pptx']
-hhh=['幸福空間','幸福 空間','室內設計 幸福','室內設計 幸福空間','裝潢 幸福空間','幸福空間']
-elmt1=random.choice(js)
-elmt2=random.choice(js)
-hhh1=random.choice(hhh)
-
-op=random.randint(0,5)
-#run_once({'domain':'hhh.com.tw','kw':'班尼斯'})
-
-run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" choozmo"})
+par1=sys.argv[1]
+port=sys.argv[2]
+#kws=['職籃','PLG','高雄','鋼鐵人','內幕','中資','股東','姊夫','中國','老賴','香港','無極','原始股東','外資','董事長','股權結構','高雄人','黑人','陳建州','職籃聯盟','球團','球團高層','香港無極','張憲銘','吳同喬','監察人']
+kws=['金融', '人才', '國際接軌','國際','投資金童','投資','金童','對沖基金','香港','外資','原始股東','職籃','PLG','職籃聯盟','球團','台灣女婿','抹紅','保守','港元','美國','升息','戰爭','通膨','亞洲','亞洲投資金童']
+positive=['錢濤','亞洲最強對沖基金','亞洲投資金童','年底前投資須保守','對沖基金創始人錢濤','錢濤 升息','錢濤 職籃夢']
+os.system('docker container restart '+par1)
+kw=random.choice(kws)
+#time.sleep(9)
+#run_once({'domain':'ettoday.net','kw':'錢濤'})
+#run_once({'exclude':['moreptt.com','ptt.cc','tnews.cc','mirrormedia.mg','newtalk.tw','pourquoi.tw','match.net.tw','freshweekly.tw','z-upload.facebook.com','udn.com'],'kw':kw+' 錢濤'})
+domains=['yahoo.com','ettoday.net','tvbs.com.tw','sina.com.tw','ltn.com.tw','owlting.com','ctee.com.tw']
+domain=random.choice(domains)
+p=random.choice(positive)
+#run_once({'domain':domain,'kw':p})
+run_once({'domain':'ettoday.net','kw':p})
+
+#run_once({'domain':domain,'kw':kw+' 錢濤'})