| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 | 
							- import xml.etree.ElementTree as XET
 
- from bs4 import BeautifulSoup
 
- import requests
 
- import csv   # 載入csv套件
 
- from urllib.request import urlopen
 
- import xml.etree.ElementTree as ET
 
- """
 
-     ElementTree.write()       將構建的XML文檔寫入(更新)文件。
 
-     Element.set(key, value)   添加和修改屬性
 
-     Element.text = ''         直接改變字段內容
 
-     Element.remove(Element)   刪除Element節點
 
-     Element.append(Element)   為當前的Elment對象添加子對象
 
-     ET.SubElement(Element,tag)創建子節點 
 
- """
 
- #  增加自動縮進換行
 
- def indent(elem, level=0):
 
-     i = "\n" + level*"  "
 
-     if len(elem):
 
-         if not elem.text or not elem.text.strip():
 
-             elem.text = i + "  "
 
-         if not elem.tail or not elem.tail.strip():
 
-             elem.tail = i
 
-         for elem in elem:
 
-             indent(elem, level+1)
 
-         if not elem.tail or not elem.tail.strip():
 
-             elem.tail = i
 
-     else:
 
-         if level and (not elem.tail or not elem.tail.strip()):
 
-             elem.tail = i
 
- # 將網路上的XML下載下來          
 
- def get_XML(url):
 
-     response = requests.get(url)
 
-     soup = BeautifulSoup(response.text, "html.parser")
 
-     path = 'tmp.xml'
 
-     f = open(path, 'w')
 
-     f.write(soup.prettify())
 
-     f.close()
 
- def read_and_add_XML():
 
-     tree = XET.parse('tmp.xml')  # 以XET套件載入XML檔案
 
-     old_root = tree.getroot() 
 
-     a = ET.Element("urlset")
 
-     a.attrib = {"xmlns":"http://www.sitemaps.org/schemas/sitemap/0.9","xmlns:xhtml":"http://www.w3.org/1999/xhtml"}
 
-     indent(a,0)
 
-     tree = ET.ElementTree(a)
 
-     tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
 
-     print("add old data ... ")
 
-     for child in old_root :
 
-         b = ET.Element("url")
 
-         # 新增節點
 
-         order1 = XET.SubElement(b, 'loc')
 
-         #添加數據
 
-         order1.text = child[0].text.strip()
 
-         # 新增節點
 
-         order2 = XET.SubElement(b, 'lastmod')
 
-         order2.text = child[1].text.strip()
 
-         order3 = XET.SubElement(b, 'priority')
 
-         order3.text = child[2].text.strip()
 
-         order4 = XET.SubElement(b, 'changefreq')
 
-         order4.text = child[3].text.strip()
 
-         a.append(b)
 
-         
 
-     indent(a,0)
 
-     tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
 
-     print("add new data ... ")
 
-     #開啟csv檔案,其中參數newline=""是為了讓資料的換行符號被正確載入,而參數encoding="utf-8"則是因為資料包含中文字元,為了讓中文字元能正確被載入
 
-     with open("Table.csv",newline="",encoding="utf-8")as file:
 
-         # 以csv.reader函式讀取資料,若資料的分隔字元非逗號(,),舉例像是冒號(:)則可增加參數delimiter = ":" 來讀取
 
-         rows = csv.reader(file)   
 
-         # 以for迴圈將資料一行一行載入
 
-         for r in rows:
 
-             orders = XET.Element('url')
 
-             # 新增節點
 
-             order1 = XET.SubElement(orders, 'loc')
 
-             order1.text = str(r[0]).replace('\n', '')
 
-             # 新增節點
 
-             order2 = XET.SubElement(orders, 'lastmod')
 
-             order2.text = str(r[1]).replace('\n', '')
 
-             order3 = XET.SubElement(orders, 'priority')
 
-             order3.text = "1.0"
 
-             order4 = XET.SubElement(orders, 'changefreq')
 
-             order4.text = "daily"
 
-             # 輸出 XML 原始資料
 
-             a.append(orders)
 
-             
 
-     indent(a,0)
 
-     tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
 
- #創建子節點,並添加屬性
 
- #創建elementtree對象,寫文件
 
- def main():
 
-     get_XML("https://hhh.com.tw/sitemap.xml")
 
-     read_and_add_XML()
 
- if __name__ == '__main__':
 
-     main()
 
 
  |