123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226 |
- from typing import NoReturn
- from selenium import webdriver
- import time
- import networkx as nx
- import dataset
- import pickle
- import traceback
- import codecs
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common import keys
- from selenium.webdriver.common.keys import Keys
- import sys
- import os
- import time
- import re
- import pandas as pd
- from browser_common import JBrowser
- import datetime
- import dataset
- import glob
- #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
- def get_designer_statistics(kw):
- global db
- data=""
- chrome_options = webdriver.ChromeOptions()
- chrome_options.add_argument("--no-sandbox")
- chrome_options.add_argument("--disable-dev-shm-usage")
- chrome_options.add_argument("start-maximized")
- chrome_options.add_argument("user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data")
- chrome_options.add_argument("--user-profile=Profile 7")
- driver = webdriver.Chrome(chrome_options=chrome_options)
- # googleurl='https://ads.google.com/aw/keywordplanner/'
- # googleurl='https://ads.google.com/aw/overview?ocid=600024232&euid=459838964&__u=6055300436&uscid=600024232&__c=2195332968&authuser=0&subid=ALL-zh-TW-et-g-aw-c-home-awhp_xin1_signin!o2'
- # googleurl='https://ads.google.com/aw/keywordplanner/home?ocid=600024232&euid=459838964&__u=6055300436&uscid=600024232&__c=2195332968&authuser=0&subid=ALL-zh-TW-et-g-aw-c-home-awhp_xin1_signin%21o2'
- googleurl='https://ads.google.com/aw/keywordplanner/home?ocid=600024232&euid=459838964&__u=6055300436&uscid=600024232&__c=2195332968&authuser=0&subid=ALL-zh-TW-et-g-aw-c-home-awhp_xin1_signin%21o2'
- # googleurl='https://ads.google.com/aw/overview?ocid=732105824&euid=459838964&__u=6055300436&uscid=732105824&__c=5922164576&authuser=0'
- driver.get(googleurl)
- driver.refresh()
- time.sleep(5)
- driver.refresh()
- element = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'尋找新的關鍵字')]/../..")))
- elmts=driver.find_elements_by_xpath("//span[contains(text(),'尋找新的關鍵字')]/../..")
- print(elmts)
- for elmt in elmts:
- print('found')
- print(elmt)
- print('clicking....')
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
- elmt=WebDriverWait(driver, 25).until(EC.presence_of_element_located((By.XPATH, "//input[contains(@class,'search-input')]")))
- # elmt=driver.find_element_by_xpath("//input[contains(@class,'search-input')]")
- # elmt.send_keys("紓困")
- # elmt.send_keys("疫苗")
- # elmt.send_keys("直播")
- # elmt.send_keys("影片製作")
- elmt.send_keys(kw)
- time.sleep(2)
- elmt.send_keys(Keys.ENTER)
- elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//div[contains(text(),'取得結果')]/..")))
- print('clicking....')
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
- time.sleep(8)
- try:
- elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'關鍵字檢視畫面')]/../../..")))
- # elmt=driver.find_element_by_xpath("//span[contains(text(),'關鍵字檢視畫面')]/../../..")
- print('clicking....')
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
- except:
- traceback.print_exc()
- try:
- time.sleep(7)
- # elmt=driver.find_element_by_xpath("//material-select-item[contains(@aria-label,'分組檢視畫面')]")
- # elmt=driver.find_element_by_xpath("//span[contains(@aria-label,'分組檢視畫面')]")
- elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'分組檢視畫面')]")))
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
- except:
- print('except')
- traceback.print_exc()
- print('clicking....')
- # try:
- # time.sleep(8)
- #
- # elmt=driver.find_element_by_xpath("//span[contains(text(),'分組檢')]/../..")
- # except:
- # print('except')
- # traceback.print_exc()
- print('clicking....')
- time.sleep(10)
- # elmt=driver.find_element_by_xpath("//material-menu[contains(@class,'download download-menu')]")
- # elmt=driver.find_element_by_xpath("//material-menu[contains(@class,'download download-menu')]//material-button[contains(@class,'trigger-button')]//i[@role='img']")
- elmt=driver.find_element_by_xpath("//material-menu[contains(@class,'download download-menu-compact')]//material-ripple")
- print(elmt)
- print(elmt.text)
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
- # body=driver.find_element_by_xpath('//body')
- time.sleep(5)
- # elmt=driver.find_element_by_xpath("//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")
- # elmt=driver.find_element_by_xpath("/html/body/div[4]/div[6]/div/div/div[2]/div[2]/div/menu-item-groups/div/material-select-item[1]/span/span")
- # elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[4]/div[6]/div/div/div[2]/div[2]/div/menu-item-groups/div/material-select-item[1]/span/span")))
- # elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")))
- elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//material-select-item[@aria-label='.csv']" )))
- # elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'.csv')]")))
- # elmt=driver.find_element_by_xpath("//div/material-select-item[1]/span/span[contains(text(),'.csv')]")
- print(elmt)
- print(elmt.text)
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-
- # body.send_keys(Keys.ARROW_DOWN)
- ## time.sleep(1)
- # body.send_keys(Keys.ARROW_DOWN)
- # time.sleep(1)
- # body.send_keys(Keys.ENTER)
- # elmt=driver.find_element_by_xpath("//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")
- # elmt=driver.find_element_by_xpath("//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")
- # print(elmt)
- # print(elmt.text)
- time.sleep(10)
- print('after sleep')
- # elmts=driver.find_elements_by_xpath("//div[@class='keyword-text _ngcontent-owh-97']")
- # elmts=driver.find_elements_by_xpath("//zippy-icon/..//keyword-text")
- # for elmt in elmts:
- # print(elmt.text)
- # data+=elmt.text+"\n"
- # fw=codecs.open('c:/tmp/out.txt','w','utf-8')
- # fw.write(data)
- # fw.close()
- # print(elmt)
- # time.sleep(9999)
- return 'ok'
- def proc_latest_file():
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- table=db['wordprice']
- list_of_files = glob.glob('C:\\Users\\jared\\Downloads\\*.csv')
- # print(list_of_files)
- latest_file = max(list_of_files, key=os.path.getmtime)
- print(latest_file)
- fr=codecs.open(latest_file,'r','utf-16')
- lines=fr.readlines()
- for l in lines[3:]:
- elmts=l.split('\t')
- month=elmts[2]
- if '--' in month:
- month=0
- if len(month)<=0:
- month=0
- change3m=elmts[3]
- change3m=change3m.replace('%','')
- if '--' in change3m:
- change3m=0
- if change3m=='∞':
- change3m=99999
- change1y=elmts[4]
- change1y=change1y.replace('%','')
- if '--' in change1y:
- change1y=0
- if change1y=='∞':
- change1y=99999
- if len(elmts)<=28:
- brand=''
- else:
- # print(len(elmts))
- brand=elmts[28]
- entry={'keyword':elmts[0],'month':int(month),'change3m':int(change3m),'change1y':int(change1y),'comptetion':elmts[5],'compidx':elmts[6],'low':elmts[7],'high':elmts[8],'brand':brand,'dt':datetime.date.today()}
- table.insert(entry)
- print(entry)
- fr.close()
- #get_designer_statistics('關鍵字')
- #get_designer_statistics('影片特效')
- #get_designer_statistics('行銷')
- #get_designer_statistics('生前契約')
- get_designer_statistics('塔位')
- proc_latest_file()
|