Explorar o código

Merge remote-tracking branch 'origin/master'

zooey hai 1 ano
pai
achega
fa7b628891

+ 1 - 1
website_clickjobs/type-1/_clickjob.py

@@ -67,7 +67,7 @@ def process_query(domain, target_domain, brands, query, type):
     if "sorry" in googleurl:
         return 444
 
-    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
+    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/div/a")
 
     print (len(elmts))
     # driver.save_screenshot('c:/tmp/test.png')

+ 1 - 1
website_clickjobs/type-1/_clickjob0113.py

@@ -78,7 +78,7 @@ def process_query(domain, target_id, url, query):
         webdriver.ActionChains(driver).move_to_element(clickmore).click().perform()
     except:
         pass
-    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
+    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/div/a")
 
     targets=[]
     if int(target_id) > 50000: #50XXX = FAQ

+ 1 - 1
website_clickjobs/type-F/_clickjob.py

@@ -59,7 +59,7 @@ def process_query(domain, target_domain, brands, query):
     driver.get(googleurl) 
     print(driver.current_url)
 
-    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
+    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/div/a")
 
     print (len(elmts))
     # driver.save_screenshot('c:/tmp/test.png')

+ 1 - 1
website_clickjobs/type-GD/_clickjob.py

@@ -73,7 +73,7 @@ def process_query(url, cust, query):
         webdriver.ActionChains(driver).move_to_element(clickmore).click().perform()
     except:
         pass
-    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
+    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/div/a")
 
     print (len(elmts))
     # driver.save_screenshot('c:/tmp/test.png')

+ 20 - 12
website_clickjobs/type-V/_execute.py

@@ -10,7 +10,7 @@ import pandas as pd
 from PIL import Image
 import dataset
 from datetime import datetime
-from random import randint
+from random import randint, choice
 
 #from setting import rua
 
@@ -31,7 +31,7 @@ def restart_browser():
 
 resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]}
 
-def process_one(term, target, n, sr, se):
+def process_one(term, tgt, n, sr, se):
     try:
         print(term)
 
@@ -44,7 +44,7 @@ def process_one(term, target, n, sr, se):
 
         df=pd.DataFrame()
     
-        elmts=driver.find_elements(By.CLASS_NAME,"ct3b9e")
+        elmts=driver.find_elements(By.CLASS_NAME,"MjjYud")
         count = len(elmts)
         print(count)
         time.sleep(10)
@@ -57,24 +57,27 @@ def process_one(term, target, n, sr, se):
         resultttl = "X"
         resultlink = "X"
         resultrank = "X"
+        target = []
         
         for elmt in elmts:
             try:
-                href=elmt.find_element(By.TAG_NAME,"a").get_attribute('href')
+                h1=elmt.find_element(By.TAG_NAME,"a")
+                href=h1.get_attribute('href')
                 datadict['搜尋詞'].append(term)
                 datadict['結果標題'].append(elmt.text)
                 datadict['結果網址'].append(href)
                 datadict['結果名次'].append(str(cnt))
-                if (n==1 or n==2) and target in href and found==0:
+
+                if 'youtube' not in href:
+                    href=href.split('?')[0]
+
+                if (n==1 or n==2) and href in tgt:
                     found=1
-                    clickelmt = elmt
+                    clickelmt = h1
                     resultrank = str(cnt)
                     resultlink = href
                     resultttl = elmt.text
-                    print("Target domain found")
-                    print(resultttl)
-                    print(resultlink)
-                    print(resultrank)
+                    target.append([clickelmt, resultlink, resultrank, resultttl])
                 cnt+=1
             except:
                 print('href2 exception')
@@ -85,10 +88,15 @@ def process_one(term, target, n, sr, se):
             # driver.quit()
             # sys.exit()
         
-        if n==2 and found==1:
+        if n==2 and len(target)!=0:
+            selection = choice(target)
+            clickelmt = selection[0]
             webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
             webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
             print("Clicked")
+            print(selection[3])
+            print(selection[1])
+            print(selection[2])
             time_stamp = datetime.fromtimestamp(time.time())
             time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
             if se==1:
@@ -96,7 +104,7 @@ def process_one(term, target, n, sr, se):
             else:
                 table=db['general_log']
             
-            table.insert({'ranking':resultrank,'kw':term,'results':count,'url':resultlink,'title':resultttl,'dt':time_stamp})
+            table.insert({'kw':term,'results':count,'url':selection[1],'ranking':selection[2],'title':selection[3],'dt':time_stamp})
             time.sleep(30)
         
         driver.quit()

+ 7 - 8
website_clickjobs/type-V/c1.py

@@ -1,22 +1,21 @@
 #coding=utf-8
 from _execute import *
-import random
 
 def pickvideo():
-    n = random.randint(0,3)
+    n = randint(0,3)
     if n==0:
         term = "信義房屋"
-        whitelist = ['https://hr.sinyi.com.tw/','https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q','https://csr.sinyi.com.tw/client/innovation.php','https://www.youtube.com/user/sinyicity','https://www.sinyi.com.tw/aboutsinyi/aboutsinyi_publish','https://csr.sinyi.com.tw/news/anti-pandemic.php','https://events.sinyi.com.tw/20190828_webintro/','https://csr.sinyi.com.tw/society/charity.php','https://www.ncscre.nccu.edu.tw/node/638','https://www.youtube.com/channel/UCV-0ttUE0Z-BTuBcwvQNjgQ','https://www.youtube.com/playlist?list=PLftfxwJcQDUCL8v2oB_jHWHue4I6vHyBz','https://www.youtube.com/watch?v=mlqY5X2oTuk','https://hr.sinyi.com.tw/events/1/?utm_source=Facebook_hr&utm_medium=display&utm_term=web&utm_content=video','https://events.sinyi.com.tw/tvc2018-forhome/']
+        whitelist = ['https://www.facebook.com/sinyifans/','https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q','https://www.sinyi.com.tw/aboutsinyi/aboutsinyi_publish','https://csr.sinyi.com.tw/','https://events.sinyi.com.tw/20190828_webintro/','https://www.youtube.com/user/sinyicity','https://csr.sinyi.com.tw/governance/index.php','https://events.sinyi.com.tw/tvc2018-forhome/','https://www.facebook.com/SinyiRealtyInc.HR/','https://hr.sinyi.com.tw/events/1/','https://csr.sinyi.com.tw/society/charity.php','https://csr.sinyi.com.tw/interactive/video.php','https://events.sinyi.com.tw/airecommend/sell/']
     if n==1:
         term = "信義 房屋"
-        whitelist = ['https://hr.sinyi.com.tw/','https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q','https://csr.sinyi.com.tw/client/innovation.php','https://www.youtube.com/user/sinyicity','https://www.sinyi.com.tw/aboutsinyi/aboutsinyi_publish','https://csr.sinyi.com.tw/news/anti-pandemic.php','https://events.sinyi.com.tw/20190828_webintro/','https://csr.sinyi.com.tw/society/charity.php','https://www.ncscre.nccu.edu.tw/node/638','https://www.youtube.com/channel/UCV-0ttUE0Z-BTuBcwvQNjgQ','https://www.youtube.com/playlist?list=PLftfxwJcQDUCL8v2oB_jHWHue4I6vHyBz','https://hr.sinyi.com.tw/events/1/?utm_source=Facebook_hr&utm_medium=display&utm_term=web&utm_content=video','https://events.sinyi.com.tw/tvc2018-forhome/','https://www.youtube.com/watch?v=rGQqegE8rbc','https://csr.sinyi.com.tw/society/community.php']
+        whitelist = ['https://www.facebook.com/sinyifans/','https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q','https://www.sinyi.com.tw/aboutsinyi/aboutsinyi_publish','https://csr.sinyi.com.tw/','https://events.sinyi.com.tw/20190828_webintro/','https://www.youtube.com/user/sinyicity','https://csr.sinyi.com.tw/governance/index.php','https://events.sinyi.com.tw/tvc2018-forhome/','https://hr.sinyi.com.tw/events/1/','https://www.facebook.com/SinyiRealtyInc.HR/','https://csr.sinyi.com.tw/society/charity.php','https://csr.sinyi.com.tw/interactive/video.php','https://events.sinyi.com.tw/airecommend/sell/','https://csr.sinyi.com.tw/society/community.php']
     if n==2:
         term = "信義房仲"
-        whitelist = ['https://www.youtube.com/watch?v=1OxZNuwlUOk','https://www.youtube.com/watch?v=uOsZiIhyxVM','https://www.youtube.com/watch?v=juJTjzi4DV0','https://www.sinyinews.com.tw/sinyipedia/buyarticle/337','https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q','https://hr.sinyi.com.tw/events/1/?utm_source=Facebook_hr&utm_medium=display&utm_term=web&utm_content=video','https://www.youtube.com/watch?v=tbdpEt65LRI','https://www.youtube.com/user/sinyicity','https://www.youtube.com/watch?v=iWOHUiGwnPE','https://csr.sinyi.com.tw/client/innovation.php','https://hr.sinyi.com.tw/events/tvc/','https://www.youtube.com/watch?v=nj1mTUG8yJo','https://www.sinyinews.com.tw/s_channel/videodetail/59','https://www.youtube.com/watch?v=A5GuB3Enw9U','https://www.mirrormedia.mg/story/20230712cnt001/']
+        whitelist = ['https://www.facebook.com/sinyifans/','https://hr.sinyi.com.tw/video','https://www.youtube.com/watch?v=uOsZiIhyxVM','https://www.sinyinews.com.tw/sinyipedia/buyarticle/337','https://www.youtube.com/watch?v=1OxZNuwlUOk','https://hr.sinyi.com.tw/events/tvc/','https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q','https://www.youtube.com/watch?v=68PyCj3iKPM','https://www.sinyinews.com.tw/s_channel/videodetail/59','https://www.youtube.com/watch?v=iWOHUiGwnPE','https://hr.sinyi.com.tw/events/1/','https://www.youtube.com/user/sinyicity','https://www.youtube.com/watch?v=juJTjzi4DV0','https://www.sinyinews.com.tw/s_channel/videodetail/60','https://www.youtube.com/watch?v=GDYKd3cvEXk']
     if n==3:
         term = "信義 房仲"
-        whitelist = ['https://www.youtube.com/watch?v=1OxZNuwlUOk','https://www.youtube.com/watch?v=uOsZiIhyxVM','https://www.youtube.com/watch?v=juJTjzi4DV0','https://www.sinyinews.com.tw/sinyipedia/buyarticle/337','https://hr.sinyi.com.tw/events/1/?utm_source=Facebook_hr&utm_medium=display&utm_term=web&utm_content=video','https://www.youtube.com/watch?v=tbdpEt65LRI','https://hr.sinyi.com.tw/events/tvc/','https://www.sinyinews.com.tw/s_channel/videodetail/58','https://sinyipodcast.com.tw/article/news/trend/44','https://www.youtube.com/user/sinyicity','https://www.youtube.com/watch?v=iWOHUiGwnPE','https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q','https://www.sinyinews.com.tw/s_channel/videodetail/59']
-    return term, random.choice(whitelist)
+        whitelist = ['https://hr.sinyi.com.tw/video','https://www.facebook.com/sinyifans/','https://www.youtube.com/watch?v=uOsZiIhyxVM','https://www.sinyinews.com.tw/sinyipedia/buyarticle/337','https://www.youtube.com/watch?v=68PyCj3iKPM','https://www.youtube.com/watch?v=1OxZNuwlUOk','https://sinyipodcast.com.tw/article/news/trend/44','https://www.sinyinews.com.tw/s_channel/videodetail/59','https://tw.yahoo.com/trendr/%E4%BF%A1%E7%BE%A9%E4%BC%81%E6%A5%AD%E9%9B%86%E5%9C%9827%E4%BA%BA%E5%A5%AA%E6%88%BF%E4%BB%B2%E6%A5%AD%E5%A5%A7%E6%96%AF%E5%8D%A1-%E6%A5%AD%E7%95%8C%E4%B9%8B%E5%86%A0-000500139.html','https://www.youtube.com/watch?v=juJTjzi4DV0','https://hr.sinyi.com.tw/events/tvc/','https://www.sinyinews.com.tw/s_channel/videodetail/60','https://www.sinyinews.com.tw/s_channel/videodetail/58']
+    return term, whitelist
 
 termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
 
@@ -30,6 +29,6 @@ if clickvideo == 1 or clickvideo == 2:
     while True:
         kw, target = pickvideo()
         process_one(kw, target, clickvideo, savefile, sec)
-        time.sleep(10)
+        time.sleep(7)
 elif savefile == 1:
     execute(termlist, "", clickvideo, savefile, sec)