|
@@ -25,7 +25,7 @@ driver = None
|
|
|
def restart_browser():
|
|
|
global driver
|
|
|
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
|
|
|
- s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_109/chromedriver')
|
|
|
+ s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_116\/chromedriver-win32\/chromedriver')
|
|
|
options = webdriver.ChromeOptions()
|
|
|
options.add_argument('--headless')
|
|
|
# options.add_argument('--remote-debugging-port=9222')
|
|
@@ -43,29 +43,29 @@ def restart_browser():
|
|
|
|
|
|
def process_one():
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
- lst=[]
|
|
|
+ lst = []
|
|
|
table=db['google_rank']
|
|
|
- # cursor = db.query('select term,domain from seo.selected_kw where client="CLIQ露營椅"')
|
|
|
- # cursor = db.query('select term,url from seo.sns_kw where client="英雄難國美人酒"')
|
|
|
- # cursor = db.query('select term from seo.selected_kw where client="plantA"')
|
|
|
- # cursor = db.query('select term,domain from seo.select_kw where id between 2216 and 2255')
|
|
|
- cursor = db.query('select term,domain from seo.select_kw where client="美麗馨"')
|
|
|
+ # cursor = db.query('select term,domain from seo.selected_kw where client="新飛0721"')
|
|
|
+ # cursor = db.query('select term,url from seo.sns_kw where client="理茶"')
|
|
|
+ # cursor = db.query('select term,domain from seo.selected_kw where client="好晴天"')
|
|
|
+ # cursor = db.query('select term,domain from seo.select_kw where id between 1661 and 1672')
|
|
|
+ cursor = db.query('select term,domain from seo.select_kw where client="幸福空間"')
|
|
|
# cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
|
|
|
for c in cursor:
|
|
|
lst.append([c['term'],c['domain']])
|
|
|
+ # lst.append([c['term'],c['url']])
|
|
|
# lst.append(c['term'])
|
|
|
- # domain = 'pinews.asia'
|
|
|
+ # domain = 'fleurancenature.tw'
|
|
|
for i in lst:
|
|
|
print(i)
|
|
|
driver=restart_browser()
|
|
|
escaped_search_term=urllib.parse.quote(i[0])
|
|
|
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(escaped_search_term, 100,'zh-TW')
|
|
|
driver.get(googleurl)
|
|
|
- time.sleep(60)
|
|
|
print(driver.current_url)
|
|
|
- # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/'+i[0]+'.png')
|
|
|
+ # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/新飛\/0721\/'+i[0]+'.png')
|
|
|
df=pd.DataFrame()
|
|
|
- elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
|
|
|
+ elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']//a")
|
|
|
print('結果數量',len(elmts))
|
|
|
cnt=1
|
|
|
datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
|
|
@@ -77,6 +77,11 @@ def process_one():
|
|
|
table.insert({'title':elmt.text,'url':href,'keyword':i[0],'dt':datetime.datetime.now(),'ranking':cnt})
|
|
|
print(href)
|
|
|
print(elmt.text)
|
|
|
+ # if 'hhh.com.tw' in href:
|
|
|
+ # table.insert({'title': elmt.text, 'url': href, 'keyword': i, 'dt': datetime.datetime.now(),
|
|
|
+ # 'ranking': cnt})
|
|
|
+ # print(href)
|
|
|
+ # print(elmt.text)
|
|
|
datadict['搜尋詞'].append(i[0])
|
|
|
datadict['結果標題'].append(elmt.text)
|
|
|
datadict['結果網址'].append(href)
|
|
@@ -93,11 +98,11 @@ def process_one():
|
|
|
df['結果網址']=datadict['結果網址']
|
|
|
df['結果名次']=datadict['結果名次']
|
|
|
|
|
|
- df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\soapdays\/'+i[0]+".xls")
|
|
|
+ # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/芙樂思\/0720\/'+i+'.png')
|
|
|
|
|
|
driver.quit()
|
|
|
print('等待')
|
|
|
- time.sleep(random.randint(100,120))
|
|
|
+ time.sleep(random.randint(70,90))
|
|
|
db.close()
|
|
|
process_one()
|
|
|
|