sitemap_reader.py 759 B

123456789101112131415161718192021222324252627
  1. import codecs
  2. from bs4 import BeautifulSoup
  3. # Reading the data inside the xml file to a variable under the name data
  4. with codecs.open('c:/tmp/sitemap.xml', 'r','utf-8') as f:
  5. data = f.read()
  6. # Passing the stored data inside the beautifulsoup parser
  7. bs_data = BeautifulSoup(data, 'xml')
  8. # Finding all instances of tag
  9. b_unique = bs_data.find_all('url')
  10. cnt=0
  11. for el in b_unique:
  12. loc=el.find('loc')
  13. print(loc.text)
  14. #b_name = bs_data.find('child', {'name':'Acer'})
  15. #print(b_unique)
  16. # Using find() to extract attributes of the first instance of the tag
  17. #b_name = bs_data.find('child', {'name':'Acer'})
  18. #print(b_name)
  19. # Extracting the data stored in a specific attribute of the `child` tag
  20. #value = b_name.get('qty')
  21. #print(value)