utility.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. from bs4 import BeautifulSoup
  2. import pandas as pd
  3. from selenium.webdriver.common.by import By
  4. from selenium import webdriver
  5. from selenium.webdriver.common.action_chains import ActionChains
  6. from selenium.webdriver.common.keys import Keys
  7. #from seleniumwire import webdriver
  8. #from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  9. import time
  10. def brower_start(port):
  11. options = webdriver.ChromeOptions()
  12. # browser = webdriver.Chrome(options=options)
  13. # 上面成功再來用docker
  14. browser = webdriver.Remote(
  15. command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
  16. desired_capabilities=options.to_capabilities()
  17. )
  18. return browser
  19. def brower_start2(port):
  20. option = webdriver.ChromeOptions()
  21. option.add_argument('--disable-web-security')
  22. option.add_argument('--allow-running-insecure-content')
  23. driver = webdriver.Chrome(options=option)
  24. executor_url = driver.command_executor._url
  25. session_id = driver.session_id
  26. print (session_id)
  27. print (executor_url)
  28. time.sleep(3)
  29. return driver
  30. def serive_create():
  31. option = webdriver.ChromeOptions()
  32. option.add_argument('--disable-web-security')
  33. option.add_argument('--allow-running-insecure-content')
  34. # option.add_argument("--user-data-dir=//Users//noodles//Documents//project")
  35. # option.add_argument("profile-directory="+profilepath)
  36. driver = webdriver.Chrome('../../driver/chromedriver_win32/chromedriver', options=option)
  37. executor_url = driver.command_executor._url
  38. session_id = driver.session_id
  39. print (session_id)
  40. print (executor_url)
  41. time.sleep(3)
  42. return driver
  43. def string_check(x):
  44. return x.rstrip().lstrip()
  45. def get_content_info(driver):
  46. shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
  47. post_info = shop_soup.select("a.app-aware-link div.update-components-actor__meta ")[0]
  48. post_name = post_info.find('span', class_='t-bold').text
  49. post_name = string_check(post_name)
  50. post_position = post_info.find('span', class_='t-black--light').text
  51. post_position = string_check(post_position)
  52. print(post_name, ';', post_position)
  53. content = shop_soup.find('div',class_='feed-shared-update-v2__description-wrapper').select("span[dir='ltr']")[0].text
  54. print(content)
  55. try:
  56. content_url = shop_soup.select('div.update-components-article__link-container')[0].find('a').get('href')
  57. except:
  58. content_url = ''
  59. return {
  60. 'post_name': post_name,
  61. 'post_position':post_position,
  62. 'content':content,
  63. 'content_url':content_url
  64. }
  65. def linkedin_login(driver, config, user_choose='person2'):
  66. user = config[user_choose]['user']
  67. passwd = config[user_choose]['passwd']
  68. user_button = driver.find_element(By.ID, "username")
  69. driver.implicitly_wait(30)
  70. ActionChains(driver).move_to_element(user_button).click(user_button).send_keys(user).perform()
  71. # time.sleep(3)
  72. passwd_button = driver.find_element(By.ID, "password")
  73. driver.implicitly_wait(30)
  74. ActionChains(driver).move_to_element(passwd_button).click(passwd_button).send_keys(passwd).send_keys(Keys.ENTER).perform()
  75. # time.sleep(1)
  76. def check_duplicate(table_name, column, db):
  77. result = db.query(f'SELECT {column} FROM {table_name}')
  78. result = pd.DataFrame([dict(i) for i in result])
  79. return result[column].to_list()
  80. def check_page(driver):
  81. soup = BeautifulSoup(driver.page_source, 'html.parser')
  82. try:
  83. if soup.find('h2', class_='headline-new').text.find('我們無法聯絡到您') != -1:
  84. print('email error')
  85. ignore_button = driver.find_element(By.CSS_SELECTOR, "button.secondary-action-new")
  86. driver.implicitly_wait(30)
  87. ActionChains(driver).move_to_element(ignore_button).click(ignore_button).perform()
  88. except:
  89. pass