gsearch.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. from selenium import webdriver
  2. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  3. import time
  4. import os
  5. import urllib.parse
  6. from selenium.webdriver.support.ui import WebDriverWait
  7. from selenium.webdriver.common.by import By
  8. from selenium.webdriver.support import expected_conditions as EC
  9. import codecs
  10. import random
  11. from bs4 import BeautifulSoup
  12. import requests
  13. import time
  14. import rpyc
  15. import sys
  16. import docker
  17. import dataset
  18. import re
  19. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  20. headers = {
  21. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  22. "Content-Type": "application/x-www-form-urlencoded"
  23. }
  24. def process_query(q):
  25. global driver
  26. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  27. driver.get(googleurl)
  28. time.sleep(3)
  29. try:
  30. elmt=driver.find_element_by_xpath("//div[@class='HlosJb bOkdDe']")
  31. print(elmt.text)
  32. return 0
  33. except:
  34. print('not found')
  35. try:
  36. elmt=driver.find_element_by_xpath("//div[@id='result-stats']")
  37. print(elmt.text)
  38. m=re.search('找到约 ([\d,]+) 条结果',elmt.text)
  39. if m:
  40. txt=m.group(1).replace(',','')
  41. return int(txt)
  42. except:
  43. print('not found')
  44. return 0
  45. # idx=1
  46. client = docker.from_env()
  47. ls=client.containers.list()
  48. print(ls)
  49. ls[0].restart()
  50. time.sleep(12)
  51. options = webdriver.ChromeOptions()
  52. options.add_argument("--proxy-server=socks5://172.104.67.159:8180")
  53. driver = webdriver.Remote(
  54. command_executor='http://127.0.0.1:4444/wd/hub',
  55. #command_executor='http://192.53.174.202:4444/wd/hub',
  56. #command_executor='http://172.104.93.163:4444/wd/hub',
  57. #command_executor='http://dev2.choozmo.com:14444/wd/hub',
  58. desired_capabilities=options.to_capabilities())
  59. #desired_capabilities=DesiredCapabilities.CHROME)
  60. driver.set_window_size(1400,1000)
  61. name=None
  62. designers=[]
  63. #cursor=db.query('select name,vip from customer_list order by updated asc limit 3')
  64. cursor=db.query('select name,vip from customer_list where name not in (select designer from designer_social)')
  65. for c in cursor:
  66. name=c['name']
  67. designers.append(name)
  68. print(name)
  69. # break
  70. table=db['designer_social']
  71. for d in designers:
  72. m01=process_query('"'+d+'" site:mobile01.com')
  73. ptt=process_query('"'+d+'" site:ptt.cc')
  74. pix=process_query('"'+d+'" site:pixnet.net')
  75. table.insert({'designer':d,'m01':m01,'ptt':ptt,'pix':pix})
  76. db.commit()
  77. #num=process_query('"'+name+'"')
  78. #process_query('彙禾設計')
  79. time.sleep(9999)
  80. #https://whatismyipaddress.com/ip/61.230.75.30
  81. #driver.get('https://whatismyipaddress.com/')
  82. #fw=codecs.open('c:/tmp/gg.html','w','utf-8')
  83. #fw.write(driver.page_source)
  84. #fw.close()
  85. #import sys
  86. #sys.exit()
  87. #### qlist=get_list()