Browser_ads_kw.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. from typing import NoReturn
  2. from selenium import webdriver
  3. import time
  4. import networkx as nx
  5. import dataset
  6. import pickle
  7. import traceback
  8. import codecs
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.support.ui import WebDriverWait
  11. from selenium.webdriver.support import expected_conditions as EC
  12. from selenium.webdriver.common import keys
  13. from selenium.webdriver.common.keys import Keys
  14. import sys
  15. import os
  16. import time
  17. import re
  18. import pandas as pd
  19. from browser_common import JBrowser
  20. import datetime
  21. import dataset
  22. import glob
  23. #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  24. def get_designer_statistics(kw):
  25. global db
  26. data=""
  27. chrome_options = webdriver.ChromeOptions()
  28. chrome_options.add_argument("--no-sandbox")
  29. chrome_options.add_argument("--disable-dev-shm-usage")
  30. chrome_options.add_argument("start-maximized")
  31. chrome_options.add_argument("user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data")
  32. chrome_options.add_argument("--user-profile=Profile 7")
  33. driver = webdriver.Chrome(chrome_options=chrome_options)
  34. # googleurl='https://ads.google.com/aw/keywordplanner/'
  35. # googleurl='https://ads.google.com/aw/overview?ocid=600024232&euid=459838964&__u=6055300436&uscid=600024232&__c=2195332968&authuser=0&subid=ALL-zh-TW-et-g-aw-c-home-awhp_xin1_signin!o2'
  36. # googleurl='https://ads.google.com/aw/keywordplanner/home?ocid=600024232&euid=459838964&__u=6055300436&uscid=600024232&__c=2195332968&authuser=0&subid=ALL-zh-TW-et-g-aw-c-home-awhp_xin1_signin%21o2'
  37. googleurl='https://ads.google.com/aw/keywordplanner/home?ocid=600024232&euid=459838964&__u=6055300436&uscid=600024232&__c=2195332968&authuser=0&subid=ALL-zh-TW-et-g-aw-c-home-awhp_xin1_signin%21o2'
  38. # googleurl='https://ads.google.com/aw/overview?ocid=732105824&euid=459838964&__u=6055300436&uscid=732105824&__c=5922164576&authuser=0'
  39. driver.get(googleurl)
  40. driver.refresh()
  41. time.sleep(5)
  42. driver.refresh()
  43. element = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'尋找新的關鍵字')]/../..")))
  44. elmts=driver.find_elements_by_xpath("//span[contains(text(),'尋找新的關鍵字')]/../..")
  45. print(elmts)
  46. for elmt in elmts:
  47. print('found')
  48. print(elmt)
  49. print('clicking....')
  50. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  51. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  52. elmt=WebDriverWait(driver, 25).until(EC.presence_of_element_located((By.XPATH, "//input[contains(@class,'search-input')]")))
  53. # elmt=driver.find_element_by_xpath("//input[contains(@class,'search-input')]")
  54. # elmt.send_keys("紓困")
  55. # elmt.send_keys("疫苗")
  56. # elmt.send_keys("直播")
  57. # elmt.send_keys("影片製作")
  58. elmt.send_keys(kw)
  59. time.sleep(2)
  60. elmt.send_keys(Keys.ENTER)
  61. elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//div[contains(text(),'取得結果')]/..")))
  62. print('clicking....')
  63. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  64. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  65. time.sleep(8)
  66. try:
  67. elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'關鍵字檢視畫面')]/../../..")))
  68. # elmt=driver.find_element_by_xpath("//span[contains(text(),'關鍵字檢視畫面')]/../../..")
  69. print('clicking....')
  70. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  71. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  72. except:
  73. traceback.print_exc()
  74. try:
  75. time.sleep(7)
  76. # elmt=driver.find_element_by_xpath("//material-select-item[contains(@aria-label,'分組檢視畫面')]")
  77. # elmt=driver.find_element_by_xpath("//span[contains(@aria-label,'分組檢視畫面')]")
  78. elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'分組檢視畫面')]")))
  79. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  80. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  81. except:
  82. print('except')
  83. traceback.print_exc()
  84. print('clicking....')
  85. # try:
  86. # time.sleep(8)
  87. #
  88. # elmt=driver.find_element_by_xpath("//span[contains(text(),'分組檢')]/../..")
  89. # except:
  90. # print('except')
  91. # traceback.print_exc()
  92. print('clicking....')
  93. time.sleep(10)
  94. # elmt=driver.find_element_by_xpath("//material-menu[contains(@class,'download download-menu')]")
  95. # elmt=driver.find_element_by_xpath("//material-menu[contains(@class,'download download-menu')]//material-button[contains(@class,'trigger-button')]//i[@role='img']")
  96. elmt=driver.find_element_by_xpath("//material-menu[contains(@class,'download download-menu-compact')]//material-ripple")
  97. print(elmt)
  98. print(elmt.text)
  99. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  100. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  101. # body=driver.find_element_by_xpath('//body')
  102. time.sleep(5)
  103. # elmt=driver.find_element_by_xpath("//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")
  104. # elmt=driver.find_element_by_xpath("/html/body/div[4]/div[6]/div/div/div[2]/div[2]/div/menu-item-groups/div/material-select-item[1]/span/span")
  105. # elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[4]/div[6]/div/div/div[2]/div[2]/div/menu-item-groups/div/material-select-item[1]/span/span")))
  106. # elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")))
  107. elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//material-select-item[@aria-label='.csv']" )))
  108. # elmt = WebDriverWait(driver, 25).until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'.csv')]")))
  109. # elmt=driver.find_element_by_xpath("//div/material-select-item[1]/span/span[contains(text(),'.csv')]")
  110. print(elmt)
  111. print(elmt.text)
  112. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  113. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  114. # body.send_keys(Keys.ARROW_DOWN)
  115. ## time.sleep(1)
  116. # body.send_keys(Keys.ARROW_DOWN)
  117. # time.sleep(1)
  118. # body.send_keys(Keys.ENTER)
  119. # elmt=driver.find_element_by_xpath("//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")
  120. # elmt=driver.find_element_by_xpath("//span[contains(@class,'menu-item-label') and contains(text(),'.csv')]")
  121. # print(elmt)
  122. # print(elmt.text)
  123. time.sleep(10)
  124. print('after sleep')
  125. # elmts=driver.find_elements_by_xpath("//div[@class='keyword-text _ngcontent-owh-97']")
  126. # elmts=driver.find_elements_by_xpath("//zippy-icon/..//keyword-text")
  127. # for elmt in elmts:
  128. # print(elmt.text)
  129. # data+=elmt.text+"\n"
  130. # fw=codecs.open('c:/tmp/out.txt','w','utf-8')
  131. # fw.write(data)
  132. # fw.close()
  133. # print(elmt)
  134. # time.sleep(9999)
  135. return 'ok'
  136. def proc_latest_file():
  137. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  138. table=db['wordprice']
  139. list_of_files = glob.glob('C:\\Users\\jared\\Downloads\\*.csv')
  140. # print(list_of_files)
  141. latest_file = max(list_of_files, key=os.path.getmtime)
  142. print(latest_file)
  143. fr=codecs.open(latest_file,'r','utf-16')
  144. lines=fr.readlines()
  145. for l in lines[3:]:
  146. elmts=l.split('\t')
  147. month=elmts[2]
  148. if '--' in month:
  149. month=0
  150. if len(month)<=0:
  151. month=0
  152. change3m=elmts[3]
  153. change3m=change3m.replace('%','')
  154. if '--' in change3m:
  155. change3m=0
  156. if change3m=='∞':
  157. change3m=99999
  158. change1y=elmts[4]
  159. change1y=change1y.replace('%','')
  160. if '--' in change1y:
  161. change1y=0
  162. if change1y=='∞':
  163. change1y=99999
  164. if len(elmts)<=28:
  165. brand=''
  166. else:
  167. # print(len(elmts))
  168. brand=elmts[28]
  169. entry={'keyword':elmts[0],'month':int(month),'change3m':int(change3m),'change1y':int(change1y),'comptetion':elmts[5],'compidx':elmts[6],'low':elmts[7],'high':elmts[8],'brand':brand,'dt':datetime.date.today()}
  170. table.insert(entry)
  171. print(entry)
  172. fr.close()
  173. #get_designer_statistics('關鍵字')
  174. #get_designer_statistics('影片特效')
  175. #get_designer_statistics('行銷')
  176. #get_designer_statistics('生前契約')
  177. get_designer_statistics('塔位')
  178. proc_latest_file()