profile_selenium.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import traceback
  2. from selenium import webdriver
  3. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  4. import time
  5. import os
  6. import datetime
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.support import expected_conditions as EC
  11. import codecs
  12. import random
  13. from bs4 import BeautifulSoup
  14. import requests
  15. import time
  16. import rpyc
  17. import sys
  18. import docker
  19. import googlesearch
  20. import codecs
  21. import sys
  22. import time
  23. import dataset
  24. import os
  25. def process_one(driver):
  26. lst=[]
  27. elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
  28. for elmt in elmts:
  29. try:
  30. href=elmt.get_attribute('href')
  31. # print(href)
  32. txt=elmt.text.split('\n')
  33. print(txt[0])
  34. lst.append({'title':txt[0],'url':href})
  35. except:
  36. print('href2 exception')
  37. traceback.print_exc()
  38. return lst
  39. def process_query(driver,qs,number_results=10,language_code='en'):
  40. escaped_search_term=urllib.parse.quote(qs)
  41. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code)
  42. print(googleurl)
  43. driver.get(googleurl)
  44. time.sleep(3)
  45. totallst=[]
  46. while True:
  47. lst=process_one(driver)
  48. totallst+=lst
  49. try:
  50. elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
  51. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  52. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  53. except:
  54. traceback.print_exc()
  55. print('pnnext exception')
  56. break
  57. time.sleep(1.5)
  58. return totallst
  59. result=[]
  60. driver=None
  61. def restart_browser():
  62. # os.system('docker container restart p4444')
  63. # time.sleep(10)
  64. options = webdriver.ChromeOptions()
  65. options.add_argument("start-maximized")
  66. options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
  67. options.add_argument('--profile-directory=Profile 77')
  68. driver=webdriver.Chrome(chrome_options=options)
  69. #driver = webdriver.Remote(
  70. # command_executor='http://127.0.0.1:4444/wd/hub',
  71. #desired_capabilities=options.to_capabilities())
  72. # desired_capabilities=DesiredCapabilities.CHROME)
  73. driver.set_window_size(1400,1000)
  74. return driver
  75. driver=restart_browser()
  76. driver.get('http://facebook.com')
  77. time.sleep(9999)