Browse Source

Merge remote-tracking branch 'origin/master'

zooey 2 years ago
parent
commit
8ea066c799

+ 2 - 2
website_clickjobs/type-1/hhh_r.py

@@ -69,5 +69,5 @@ while True:
         time.sleep(3)
         statuscode = execute(domain, target_domain, brands, ql)
         if statuscode == 444:
-            break
-time.sleep(60)
+            time.sleep(120)
+time.sleep(60)

BIN
website_clickjobs/type-FD-N/__pycache__/_clickjob.cpython-39.pyc


BIN
website_clickjobs/type-FD-N/__pycache__/_clickjobexp.cpython-39.pyc


+ 119 - 0
website_clickjobs/type-V/_execute.py

@@ -0,0 +1,119 @@
+from selenium import webdriver
+import traceback
+import time
+import argparse
+#import schedule
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+import pandas as pd
+from PIL import Image
+import dataset
+from datetime import datetime
+from random import randint
+
+#from setting import rua
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['nda_log']
+path = 'C:/ChromeDriver' #pls adjust
+path_z = 'C:/ChromeDriver' #pls adjust
+
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    #options.add_argument("user-agent=%s" % rua())
+    options.add_argument('--headless')
+    options.add_argument('--incognito')
+    options.add_argument('--no-sandbox')
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(950, 20000)
+    return driver
+
+resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+
+def process_one(term, target, n, sr):
+    try:
+        print(term)
+
+        driver=restart_browser()
+        # escaped_search_term=urllib.parse.quote(term)
+        googleurl = 'https://www.google.com/search?num=100&tbm=vid&hl=zh-TW&q=' + term
+        driver.get(googleurl)
+        time.sleep(6)
+        fname=term.replace(' ','_')
+
+        df=pd.DataFrame()
+    
+        elmts=driver.find_elements(By.CLASS_NAME,"ct3b9e")
+        count = len(elmts)
+        print(count)
+        time.sleep(10)
+    
+        cnt=1
+        datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+
+        found=0
+        clickelmt = None
+        resultttl = "X"
+        resultlink = "X"
+        resultrank = "X"
+        
+        for elmt in elmts:
+            try:
+                href=elmt.find_element(By.TAG_NAME,"a").get_attribute('href')
+                datadict['搜尋詞'].append(term)
+                datadict['結果標題'].append(elmt.text)
+                datadict['結果網址'].append(href)
+                datadict['結果名次'].append(str(cnt))
+                if (n==1 or n==2) and target in href and found==0:
+                    found=1
+                    clickelmt = elmt
+                    resultrank = str(cnt)
+                    resultlink = href
+                    resultttl = elmt.text
+                    print("Target domain found")
+                    print(resultttl)
+                    print(resultlink)
+                    print(resultrank)
+                cnt+=1
+            except:
+                print('href2 exception')
+                traceback.print_exc()
+
+        if len(datadict['結果標題'])<=0:
+            print('None',term,)
+            # driver.quit()
+            # sys.exit()
+        
+        if n==2 and found==1:
+            webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+            webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+            print("Clicked")
+            time_stamp = datetime.fromtimestamp(time.time())
+            time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
+            table.insert({'ranking':resultrank,'kw':term,'results':count,'url':resultlink,'title':resultttl,'dt':time_stamp})
+            time.sleep(30)
+        
+        driver.quit()
+        print('completed')
+        time.sleep(randint(20,40)) #adjustable
+    except:
+        traceback.print_exc()
+
+    #db_local.close()
+
+def execute(termlist, target, n, sr):
+    for term in termlist:
+        process_one(term, "", n, sr)
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--loop',action="store_true")
+        args = parser.parse_args()
+
+# if args.loop:
+#     schedule.every(0.4).minutes.do(process_one)
+#     # print('今天開始')
+#     # schedule.every().day.at('9:30').do(process_one)
+#
+#     while True:
+#         schedule.run_pending()
+#         time.sleep(1)
+

+ 33 - 0
website_clickjobs/type-V/c1.py

@@ -0,0 +1,33 @@
+from _execute import *
+import random
+
+def pickvideo():
+    n = random.randint(0,3)
+    if n==0:
+        term = "信義房屋"
+        whitelist = ["https://www.youtube.com/user/sinyicity","https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q","https://www.youtube.com/watch?v=J_3FkWnoKX8","https://www.youtube.com/watch?v=nLOvQNocmWM","https://www.youtube.com/watch?v=By63yTOiPFQ","https://csr.sinyi.com.tw/client/innovation.php","https://events.sinyi.com.tw/20190828_webintro/","https://www.youtube.com/watch?v=70u509rtnHc","https://www.youtube.com/watch?v=mLerG1EN9Og","https://www.youtube.com/watch?v=HcXPOLE0DX0","https://www.youtube.com/watch?v=1NZH4Npo6Bw","https://events.sinyi.com.tw/tvc2018-forhome/index.html","https://www.sinyi.com.tw/aboutsinyi/aboutsinyi_publish","https://www.youtube.com/watch?v=nj1mTUG8yJo","https://www.youtube.com/watch?v=3Xy7YCCsnq0"]
+    if n==1:
+        term = "信義 房屋"
+        whitelist = ["https://www.youtube.com/user/sinyicity","https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/channel/UCU76s7FGtDvDta2mf2Zdn4Q","https://www.youtube.com/watch?v=J_3FkWnoKX8","https://www.youtube.com/watch?v=nLOvQNocmWM","https://www.youtube.com/watch?v=By63yTOiPFQ","https://csr.sinyi.com.tw/client/innovation.php","https://events.sinyi.com.tw/20190828_webintro/","https://www.youtube.com/watch?v=70u509rtnHc","https://www.youtube.com/watch?v=mLerG1EN9Og","https://www.youtube.com/watch?v=HcXPOLE0DX0","https://www.youtube.com/watch?v=1NZH4Npo6Bw","https://events.sinyi.com.tw/tvc2018-forhome/index.html","https://www.sinyi.com.tw/aboutsinyi/aboutsinyi_publish","https://www.youtube.com/watch?v=nj1mTUG8yJo","https://www.youtube.com/watch?v=3Xy7YCCsnq0","https://www.youtube.com/watch?v=IMjOhpAHcfs","https://www.youtube.com/watch?v=2a510BJzyn0","https://www.youtube.com/watch?v=TwZwAAqkX_k","https://www.youtube.com/watch?v=DLbCu_zia-4","https://www.youtube.com/watch?v=mlqY5X2oTuk"]
+    if n==2:
+        term = "信義房仲"
+        whitelist = ["https://news.housefun.com.tw/news/article/112920310098.html","https://www.facebook.com/SinyiRealtyInc.HR/videos/%E6%88%BF%E4%BB%B2%E5%B7%A5%E4%BD%9C%E9%96%8B%E7%AE%B1%E5%85%AD%E5%80%8B%E6%9C%88%E4%B9%8B%E5%BE%8C%E6%88%91%E9%82%84%E5%9C%A8/932749653922769/","https://www.youtube.com/watch?v=uOsZiIhyxVM","https://www.youtube.com/watch?v=juJTjzi4DV0","https://www.youtube.com/user/sinyicity","https://sinyipodcast.com.tw/article/news/trend/44","https://news.housefun.com.tw/news/article/205819310765.html","https://www.sinyinews.com.tw/sinyipedia/buyarticle/337","https://www.youtube.com/watch?v=X7OcMNhXPD8","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=nj1mTUG8yJo","https://www.youtube.com/watch?v=Hi-IY6R7_10","https://www.youtube.com/watch?v=mlqY5X2oTuk","https://www.youtube.com/watch?v=1NZH4Npo6Bw","https://www.youtube.com/watch?v=U95DWC8OXzc","https://www.youtube.com/watch?v=IMjOhpAHcfs","https://www.youtube.com/watch?v=mLerG1EN9Og","https://csr.sinyi.com.tw/client/innovation.php","https://ydwell.com.tw/%E2%9F%B0%E6%88%91%E5%9C%A8%E4%BF%A1%E7%BE%A9%E4%B8%8A%E7%8F%AD/%E4%BB%80%E9%BA%BC%E6%88%BF%E4%BB%B2%E8%B6%85%E7%B4%9A%E6%A5%AD%E5%8B%99%E5%8F%AF%E4%BB%A5%E4%B8%80%E6%AC%A1%E9%A0%98%E8%B5%B0600%E8%90%AC%EF%BC%8C%E6%9C%83%E4%B8%8D%E6%9C%83%E5%A4%AA%E8%AA%87/","https://zh-tw.facebook.com/SinyiRealtyInc.HR/videos/2022%E4%B8%83%E6%9C%88%E7%B7%9A%E4%B8%8A%E6%88%BF%E4%BB%B2%E5%B7%A5%E4%BD%9C%E8%AA%AA%E6%98%8E%E6%9C%83/437840664688242/","https://www.youtube.com/watch?v=tQVXzPvrAhk","https://www.youtube.com/watch?v=nLOvQNocmWM"]
+    if n==3:
+        term = "信義 房仲"
+        whitelist = ["https://www.facebook.com/SinyiRealtyInc.HR/videos/%E6%88%BF%E4%BB%B2%E5%B7%A5%E4%BD%9C%E9%96%8B%E7%AE%B1%E5%85%AD%E5%80%8B%E6%9C%88%E4%B9%8B%E5%BE%8C%E6%88%91%E9%82%84%E5%9C%A8/932749653922769/","https://www.youtube.com/watch?v=uOsZiIhyxVM","https://www.youtube.com/watch?v=juJTjzi4DV0","https://news.housefun.com.tw/news/article/771949232630.html","https://sinyipodcast.com.tw/article/news/trend/44","https://www.youtube.com/user/sinyicity","https://www.youtube.com/watch?v=X7OcMNhXPD8","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.sinyinews.com.tw/sinyipedia/buyarticle/337","https://www.youtube.com/watch?v=nj1mTUG8yJo","https://www.youtube.com/watch?v=U95DWC8OXzc","https://www.youtube.com/watch?v=mlqY5X2oTuk","https://www.youtube.com/watch?v=Hi-IY6R7_10","https://www.youtube.com/watch?v=1NZH4Npo6Bw","https://www.youtube.com/watch?v=IMjOhpAHcfs","https://www.youtube.com/watch?v=mLerG1EN9Og"]
+    return term, random.choice(whitelist)
+
+termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
+
+clickvideo = 0 # set to 1 to alert if video is found (DOES NOT CLICK!), requires whitelist
+# ^ set to 2 to click the video when found, requires whitelist
+# ^ any other value will only search for videos based on termlist
+savefile = 1 # set to 1 to save screenshot and ranking to file
+
+if clickvideo == 1 or clickvideo == 2:
+    while True:
+        kw, target = pickvideo()
+        process_one(kw, target, clickvideo, savefile)
+        time.sleep(10)
+elif savefile == 1:
+    execute(termlist, "", clickvideo, savefile)

+ 138 - 0
website_clickjobs/type-Y/_execute.py

@@ -0,0 +1,138 @@
+from selenium import webdriver
+import traceback
+import time
+import argparse
+#import schedule
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+import pandas as pd
+from PIL import Image
+import dataset
+from datetime import datetime
+from random import randint
+
+#from setting import rua
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['nda_log']
+path = 'C:/ChromeDriver' #pls adjust
+path_z = 'C:/ChromeDriver' #pls adjust
+
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    #options.add_argument("user-agent=%s" % rua())
+    options.add_argument('--headless')
+    options.add_argument('--incognito')
+    options.add_argument('--no-sandbox')
+    driver=webdriver.Chrome(options=options)
+    driver.set_window_size(950, 20000)
+    return driver
+
+resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+
+def process_one(term, target, n, sr):
+    try:
+        print(term)
+
+        driver=restart_browser()
+        # escaped_search_term=urllib.parse.quote(term)
+        yturl = 'https://www.youtube.com/results?search_query=' + term
+        driver.get(yturl)
+        time.sleep(6)
+        fname=term.replace(' ','_')
+
+        df=pd.DataFrame()
+
+        more=driver.find_elements(By.XPATH, "//div[@id='more']")
+        for m in more:
+            try:
+                morebtn = m.find_element(By.TAG_NAME, "yt-formatted-string")
+                webdriver.ActionChains(driver).move_to_element(morebtn).perform()
+                webdriver.ActionChains(driver).move_to_element(morebtn).click().perform()
+            except:
+                print("nope")
+
+        time.sleep(5)
+    
+        #elmts=driver.find_elements(By.TAG_NAME,"ytd-video-renderer")
+        elmts=driver.find_elements(By.XPATH, "//div[@class='text-wrapper style-scope ytd-video-renderer']")
+        count = len(elmts)
+        print(count)
+        time.sleep(3)
+    
+        cnt=1
+        datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'作者':[],'結果說明':[],'結果名次':[]}
+        
+        found=0
+        clickelmt = None
+        resultttl = "X"
+        resultlink = "X"
+        resultrank = "X"
+
+        for elmt in elmts:
+            try:
+                dt=elmt.find_element(By.TAG_NAME,"h3")
+                title=dt.text
+                href=dt.find_element(By.TAG_NAME,"a").get_attribute('href')
+                author=elmt.find_element(By.ID, "channel-info").find_element(By.ID,"text-container")
+                #info=elmt.find_element(By.XPATH, "//yt-formatted-string[@class='metadata-snippet-text style-scope ytd-video-renderer']").text
+                #print(info)
+                datadict['搜尋詞'].append(term)
+                datadict['結果標題'].append(title)
+                datadict['結果網址'].append(href)
+                datadict['作者'].append(author.text)
+                #datadict['結果說明'].append(info)
+                datadict['結果名次'].append(str(cnt))
+                if (n==1 or n==2) and target in href and found==0:
+                    found=1
+                    clickelmt = elmt
+                    resultrank = str(cnt)
+                    resultlink = href
+                    resultttl = elmt.text
+                    print("Target domain found")
+                    print(resultttl)
+                    print(resultlink)
+                    print(resultrank)
+                cnt+=1
+            except:
+                print('href2 exception')
+                traceback.print_exc()
+
+        if len(datadict['結果標題'])<=0:
+            print('None',term,)
+            # driver.quit()
+            # sys.exit()
+
+        if n==2 and found==1:
+            webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+            webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+            print("Clicked")
+            time_stamp = datetime.fromtimestamp(time.time())
+            time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
+            table.insert({'ranking':resultrank,'kw':term,'results':count,'url':resultlink,'title':resultttl,'dt':time_stamp})
+            time.sleep(30)
+
+        driver.quit()
+        print('completed')
+        #time.sleep(randint(20,40)) #adjustable
+    except:
+        traceback.print_exc()
+
+    #db_local.close()
+
+def execute(termlist):
+    for term in termlist:
+        process_one(term)
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--loop',action="store_true")
+        args = parser.parse_args()
+
+# if args.loop:
+#     schedule.every(0.4).minutes.do(process_one)
+#     # print('今天開始')
+#     # schedule.every().day.at('9:30').do(process_one)
+#
+#     while True:
+#         schedule.run_pending()
+#         time.sleep(1)
+

+ 30 - 0
website_clickjobs/type-Y/c1.py

@@ -0,0 +1,30 @@
+from _execute import *
+import random
+
+def pickvideo():
+    n = random.randint(0,3)
+    if n==0:
+        term = "信義房屋"
+        whitelist = ["https://www.youtube.com/watch?v=TwZwAAqkX_k","https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/watch?v=DLbCu_zia-4","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=QzyTD949cVk","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=9lJ3gnqMT4o","https://www.youtube.com/watch?v=fx4fTBh9PFo","https://www.youtube.com/watch?v=Kh4Whhp2kYA","https://www.youtube.com/watch?v=tbdpEt65LRI","https://www.youtube.com/watch?v=WC_rYXVP2g8",]
+    if n==1:
+        term = "信義 房屋"
+        whitelist = ["https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=QzyTD949cVk","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=9lJ3gnqMT4o","https://www.youtube.com/watch?v=iA4__EcJE5I","https://www.youtube.com/watch?v=fx4fTBh9PFo","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=juJTjzi4DV0","https://www.youtube.com/watch?v=D9A8S1XaPnA","https://www.youtube.com/watch?v=tbdpEt65LRI","https://www.youtube.com/watch?v=Z4mts-HrBvU"]
+    if n==2:
+        term = "信義房仲"
+        whitelist = ["https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=QzyTD949cVk","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=9lJ3gnqMT4o","https://www.youtube.com/watch?v=iA4__EcJE5I","https://www.youtube.com/watch?v=5dpFPoatFrg","https://www.youtube.com/watch?v=Z4mts-HrBvU","https://www.youtube.com/watch?v=Hi-IY6R7_10","https://www.youtube.com/watch?v=e2jILHgLW10","https://www.youtube.com/watch?v=juJTjzi4DV0","https://www.youtube.com/watch?v=fx4fTBh9PFo","https://www.youtube.com/watch?v=tbdpEt65LRI"]
+    if n==3:
+        term = "信義 房仲"
+        whitelist = ["https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=QzyTD949cVk","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=9lJ3gnqMT4o","https://www.youtube.com/watch?v=iA4__EcJE5I","https://www.youtube.com/watch?v=fx4fTBh9PFo","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=juJTjzi4DV0","https://www.youtube.com/watch?v=D9A8S1XaPnA","https://www.youtube.com/watch?v=tbdpEt65LRI","https://www.youtube.com/watch?v=Z4mts-HrBvU","https://www.youtube.com/watch?v=Hi-IY6R7_10","https://www.youtube.com/watch?v=e2jILHgLW10"]
+    return term, random.choice(whitelist)
+
+termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
+
+clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires whitelist
+# ^ set to 2 to click the video when found, requires whitelist
+# ^ any other value will only search for videos based on termlist
+
+if clickvideo == 1 or clickvideo == 2:
+    while True:
+        kw, target = pickvideo()
+        process_one(kw, target, clickvideo, 0)
+        time.sleep(10)