clickbot_100.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import random
  2. import sys
  3. import dataset
  4. from selenium import webdriver
  5. import traceback
  6. import datetime
  7. import codecs
  8. import time
  9. import urllib
  10. import argparse
  11. import schedule
  12. import logging
  13. import sys
  14. from logging.handlers import SysLogHandler
  15. import socket
  16. _LOG_SERVER = ('hhh.ptt.cx', 514)
  17. logger = logging.getLogger('clickbot_100')
  18. handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
  19. logger.addHandler(handler1)
  20. term='programming running..'
  21. logger.debug('[clickbot_100]['+term+']')
  22. def restart_browser():
  23. options = webdriver.ChromeOptions()
  24. options.add_argument('--headless')
  25. driver=webdriver.Chrome(options=options)
  26. driver.set_window_size(1400,1000)
  27. return driver
  28. def process_one():
  29. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  30. lst=[]
  31. table=db['save_result']
  32. cursor=db.query('select term from selected_kw where client="清原"')
  33. for c in cursor:
  34. lst.append(c['term'])
  35. term=random.choice(lst)
  36. print(term)
  37. logger.debug('[clickbot_100]['+term+']')
  38. driver=restart_browser()
  39. escaped_search_term=urllib.parse.quote(term)
  40. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
  41. print(googleurl)
  42. driver.get(googleurl)
  43. time.sleep(6)
  44. elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
  45. clickelmt=None
  46. cnt=1
  47. for elmt in elmts:
  48. try:
  49. href=elmt.get_attribute('href')
  50. if 'taroboba-yuan.com' in href:
  51. clickelmt=elmt
  52. logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')
  53. print(href)
  54. print(elmt.text)
  55. table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
  56. cnt+=1
  57. except:
  58. print('href2 exception')
  59. traceback.print_exc()
  60. if clickelmt:
  61. webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
  62. webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
  63. driver.quit()
  64. process_one()
  65. parser = argparse.ArgumentParser()
  66. parser.add_argument('--loop')
  67. args = parser.parse_args()
  68. if args.loop:
  69. schedule.every(6).minutes.do(process_one)
  70. while True:
  71. schedule.run_pending()
  72. time.sleep(1)