|
@@ -1,13 +1,8 @@
|
|
|
import xml.etree.ElementTree as XET
|
|
|
from bs4 import BeautifulSoup
|
|
|
import requests
|
|
|
-
|
|
|
import csv # 載入csv套件
|
|
|
-
|
|
|
-
|
|
|
from urllib.request import urlopen
|
|
|
- # 取得XML表格
|
|
|
-
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
"""
|
|
@@ -34,77 +29,86 @@ def indent(elem, level=0):
|
|
|
else:
|
|
|
if level and (not elem.tail or not elem.tail.strip()):
|
|
|
elem.tail = i
|
|
|
-
|
|
|
-response = requests.get("https://hhh.com.tw/sitemap.xml")
|
|
|
-soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
|
|
-path = 'tmp.xml'
|
|
|
-f = open(path, 'w')
|
|
|
-f.write(soup.prettify())
|
|
|
-f.close()
|
|
|
+# 將網路上的XML下載下來
|
|
|
+def get_XML(url):
|
|
|
+ response = requests.get(url)
|
|
|
+ soup = BeautifulSoup(response.text, "html.parser")
|
|
|
+ path = 'tmp.xml'
|
|
|
+ f = open(path, 'w')
|
|
|
+ f.write(soup.prettify())
|
|
|
+ f.close()
|
|
|
+
|
|
|
+def read_and_add_XML():
|
|
|
+ tree = XET.parse('tmp.xml') # 以XET套件載入XML檔案
|
|
|
+ old_root = tree.getroot()
|
|
|
+ a = ET.Element("urlset")
|
|
|
+ a.attrib = {"xmlns":"http://www.sitemaps.org/schemas/sitemap/0.9","xmlns:xhtml":"http://www.w3.org/1999/xhtml"}
|
|
|
+ indent(a,0)
|
|
|
+ tree = ET.ElementTree(a)
|
|
|
+ tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
|
|
|
+
|
|
|
+ print("add old data ... ")
|
|
|
+
|
|
|
+ for child in old_root :
|
|
|
+ b = ET.Element("url")
|
|
|
+ # 新增節點
|
|
|
+ order1 = XET.SubElement(b, 'loc')
|
|
|
+ #添加數據
|
|
|
+ order1.text = child[0].text.strip()
|
|
|
+
|
|
|
+ # 新增節點
|
|
|
+ order2 = XET.SubElement(b, 'lastmod')
|
|
|
+ order2.text = child[1].text.strip()
|
|
|
+
|
|
|
+ order3 = XET.SubElement(b, 'priority')
|
|
|
+ order3.text = child[2].text.strip()
|
|
|
+
|
|
|
+ order4 = XET.SubElement(b, 'changefreq')
|
|
|
+ order4.text = child[3].text.strip()
|
|
|
+ a.append(b)
|
|
|
+
|
|
|
+ indent(a,0)
|
|
|
+ tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
|
|
|
+
|
|
|
+ print("add new data ... ")
|
|
|
+
|
|
|
+ #開啟csv檔案,其中參數newline=""是為了讓資料的換行符號被正確載入,而參數encoding="utf-8"則是因為資料包含中文字元,為了讓中文字元能正確被載入
|
|
|
+ with open("Table.csv",newline="",encoding="utf-8")as file:
|
|
|
+ # 以csv.reader函式讀取資料,若資料的分隔字元非逗號(,),舉例像是冒號(:)則可增加參數delimiter = ":" 來讀取
|
|
|
+ rows = csv.reader(file)
|
|
|
+ # 以for迴圈將資料一行一行載入
|
|
|
+ for r in rows:
|
|
|
+ orders = XET.Element('url')
|
|
|
+
|
|
|
+ # 新增節點
|
|
|
+ order1 = XET.SubElement(orders, 'loc')
|
|
|
+ order1.text = str(r[0]).replace('\n', '')
|
|
|
+
|
|
|
+ # 新增節點
|
|
|
+ order2 = XET.SubElement(orders, 'lastmod')
|
|
|
+ order2.text = str(r[1]).replace('\n', '')
|
|
|
+
|
|
|
+ order3 = XET.SubElement(orders, 'priority')
|
|
|
+ order3.text = "1.0"
|
|
|
+
|
|
|
+ order4 = XET.SubElement(orders, 'changefreq')
|
|
|
+ order4.text = "daily"
|
|
|
+ # 輸出 XML 原始資料
|
|
|
+
|
|
|
+ a.append(orders)
|
|
|
+
|
|
|
+ indent(a,0)
|
|
|
+ tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
|
|
|
|
|
|
-tree = XET.parse('tmp.xml') # 以XET套件載入XML檔案
|
|
|
-old_root = tree.getroot()
|
|
|
|
|
|
-#創建根節點
|
|
|
-a = ET.Element("urlset")
|
|
|
-a.attrib = {"xmlns":"http://www.sitemaps.org/schemas/sitemap/0.9","xmlns:xhtml":"http://www.w3.org/1999/xhtml"}
|
|
|
#創建子節點,並添加屬性
|
|
|
|
|
|
#創建elementtree對象,寫文件
|
|
|
-indent(a,0)
|
|
|
-tree = ET.ElementTree(a)
|
|
|
-tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
|
|
|
-
|
|
|
-print("add old data ... ")
|
|
|
-
|
|
|
-for child in old_root :
|
|
|
- b = ET.Element("url")
|
|
|
- # 新增節點
|
|
|
- order1 = XET.SubElement(b, 'loc')
|
|
|
- #添加數據
|
|
|
- order1.text = child[0].text.strip()
|
|
|
-
|
|
|
- # 新增節點
|
|
|
- order2 = XET.SubElement(b, 'lastmod')
|
|
|
- order2.text = child[1].text.strip()
|
|
|
-
|
|
|
- order3 = XET.SubElement(b, 'priority')
|
|
|
- order3.text = child[2].text.strip()
|
|
|
-
|
|
|
- order4 = XET.SubElement(b, 'changefreq')
|
|
|
- order4.text = child[3].text.strip()
|
|
|
- a.append(b)
|
|
|
-
|
|
|
-indent(a,0)
|
|
|
-tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
|
|
|
-
|
|
|
-print("add new data ... ")
|
|
|
-
|
|
|
-#開啟csv檔案,其中參數newline=""是為了讓資料的換行符號被正確載入,而參數encoding="utf-8"則是因為資料包含中文字元,為了讓中文字元能正確被載入
|
|
|
-with open("Table.csv",newline="",encoding="utf-8")as file:
|
|
|
- # 以csv.reader函式讀取資料,若資料的分隔字元非逗號(,),舉例像是冒號(:)則可增加參數delimiter = ":" 來讀取
|
|
|
- rows = csv.reader(file)
|
|
|
- # 以for迴圈將資料一行一行載入
|
|
|
- for r in rows:
|
|
|
- orders = XET.Element('url')
|
|
|
-
|
|
|
- # 新增節點
|
|
|
- order1 = XET.SubElement(orders, 'loc')
|
|
|
- order1.text = str(r[0]).replace('\n', '')
|
|
|
-
|
|
|
- # 新增節點
|
|
|
- order2 = XET.SubElement(orders, 'lastmod')
|
|
|
- order2.text = str(r[1]).replace('\n', '')
|
|
|
-
|
|
|
- order3 = XET.SubElement(orders, 'priority')
|
|
|
- order3.text = "1.0"
|
|
|
-
|
|
|
- order4 = XET.SubElement(orders, 'changefreq')
|
|
|
- order4.text = "daily"
|
|
|
- # 輸出 XML 原始資料
|
|
|
-
|
|
|
- a.append(orders)
|
|
|
-
|
|
|
-indent(a,0)
|
|
|
-tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
|
|
|
+
|
|
|
+def main():
|
|
|
+ get_XML("https://hhh.com.tw/sitemap.xml")
|
|
|
+ read_and_add_XML()
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|