gstest.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. from bs4 import BeautifulSoup
  2. from requests import get
  3. def search(term, num_results=10, lang="en", proxy=None):
  4. usr_agent = {
  5. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
  6. 'Chrome/61.0.3163.100 Safari/537.36'}
  7. def fetch_results(search_term, number_results, language_code):
  8. escaped_search_term = search_term.replace(' ', '+')
  9. google_url = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,
  10. language_code)
  11. proxies = None
  12. if proxy:
  13. if proxy[:5]=="https":
  14. proxies = {"https":proxy}
  15. else:
  16. proxies = {"http":proxy}
  17. response = get(google_url, headers=usr_agent, proxies=proxies)
  18. response.raise_for_status()
  19. return response.text
  20. def parse_results(raw_html):
  21. soup = BeautifulSoup(raw_html, 'html.parser')
  22. result_block = soup.find_all('div', attrs={'class': 'g'})
  23. for result in result_block:
  24. link = result.find('a', href=True)
  25. title = result.find('h3')
  26. if link and title:
  27. yield link['href']
  28. html = fetch_results(term, num_results, lang)
  29. return list(parse_results(html))
  30. search('test')