Mia пре 3 година
родитељ
комит
1a536e1fe6
1 измењених фајлова са 77 додато и 73 уклоњено
  1. 77 73
      sitemap/edit.py

+ 77 - 73
sitemap/edit.py

@@ -1,13 +1,8 @@
 import xml.etree.ElementTree as XET
 import xml.etree.ElementTree as XET
 from bs4 import BeautifulSoup
 from bs4 import BeautifulSoup
 import requests
 import requests
-
 import csv   # 載入csv套件
 import csv   # 載入csv套件
-
-
 from urllib.request import urlopen
 from urllib.request import urlopen
-     # 取得XML表格 
-
 import xml.etree.ElementTree as ET
 import xml.etree.ElementTree as ET
 
 
 """
 """
@@ -34,77 +29,86 @@ def indent(elem, level=0):
     else:
     else:
         if level and (not elem.tail or not elem.tail.strip()):
         if level and (not elem.tail or not elem.tail.strip()):
             elem.tail = i
             elem.tail = i
-            
-response = requests.get("https://hhh.com.tw/sitemap.xml")
-soup = BeautifulSoup(response.text, "html.parser")
 
 
-path = 'tmp.xml'
-f = open(path, 'w')
-f.write(soup.prettify())
-f.close()
+# 將網路上的XML下載下來          
+def get_XML(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.text, "html.parser")
+    path = 'tmp.xml'
+    f = open(path, 'w')
+    f.write(soup.prettify())
+    f.close()
+
+def read_and_add_XML():
+    tree = XET.parse('tmp.xml')  # 以XET套件載入XML檔案
+    old_root = tree.getroot() 
+    a = ET.Element("urlset")
+    a.attrib = {"xmlns":"http://www.sitemaps.org/schemas/sitemap/0.9","xmlns:xhtml":"http://www.w3.org/1999/xhtml"}
+    indent(a,0)
+    tree = ET.ElementTree(a)
+    tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
+
+    print("add old data ... ")
+
+    for child in old_root :
+        b = ET.Element("url")
+        # 新增節點
+        order1 = XET.SubElement(b, 'loc')
+        #添加數據
+        order1.text = child[0].text.strip()
+
+        # 新增節點
+        order2 = XET.SubElement(b, 'lastmod')
+        order2.text = child[1].text.strip()
+
+        order3 = XET.SubElement(b, 'priority')
+        order3.text = child[2].text.strip()
+
+        order4 = XET.SubElement(b, 'changefreq')
+        order4.text = child[3].text.strip()
+        a.append(b)
+        
+    indent(a,0)
+    tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
+
+    print("add new data ... ")
+
+    #開啟csv檔案,其中參數newline=""是為了讓資料的換行符號被正確載入,而參數encoding="utf-8"則是因為資料包含中文字元,為了讓中文字元能正確被載入
+    with open("Table.csv",newline="",encoding="utf-8")as file:
+        # 以csv.reader函式讀取資料,若資料的分隔字元非逗號(,),舉例像是冒號(:)則可增加參數delimiter = ":" 來讀取
+        rows = csv.reader(file)   
+        # 以for迴圈將資料一行一行載入
+        for r in rows:
+            orders = XET.Element('url')
+
+            # 新增節點
+            order1 = XET.SubElement(orders, 'loc')
+            order1.text = str(r[0]).replace('\n', '')
+
+            # 新增節點
+            order2 = XET.SubElement(orders, 'lastmod')
+            order2.text = str(r[1]).replace('\n', '')
+
+            order3 = XET.SubElement(orders, 'priority')
+            order3.text = "1.0"
+
+            order4 = XET.SubElement(orders, 'changefreq')
+            order4.text = "daily"
+            # 輸出 XML 原始資料
+
+            a.append(orders)
+            
+    indent(a,0)
+    tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
 
 
-tree = XET.parse('tmp.xml')  # 以XET套件載入XML檔案
-old_root = tree.getroot() 
 
 
-#創建根節點
-a = ET.Element("urlset")
-a.attrib = {"xmlns":"http://www.sitemaps.org/schemas/sitemap/0.9","xmlns:xhtml":"http://www.w3.org/1999/xhtml"}
 #創建子節點,並添加屬性
 #創建子節點,並添加屬性
 
 
 #創建elementtree對象,寫文件
 #創建elementtree對象,寫文件
-indent(a,0)
-tree = ET.ElementTree(a)
-tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
-
-print("add old data ... ")
-
-for child in old_root :
-     b = ET.Element("url")
-     # 新增節點
-     order1 = XET.SubElement(b, 'loc')
-     #添加數據
-     order1.text = child[0].text.strip()
-
-     # 新增節點
-     order2 = XET.SubElement(b, 'lastmod')
-     order2.text = child[1].text.strip()
-
-     order3 = XET.SubElement(b, 'priority')
-     order3.text = child[2].text.strip()
-
-     order4 = XET.SubElement(b, 'changefreq')
-     order4.text = child[3].text.strip()
-     a.append(b)
-     
-indent(a,0)
-tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
-
-print("add new data ... ")
-
-#開啟csv檔案,其中參數newline=""是為了讓資料的換行符號被正確載入,而參數encoding="utf-8"則是因為資料包含中文字元,為了讓中文字元能正確被載入
-with open("Table.csv",newline="",encoding="utf-8")as file:
-    # 以csv.reader函式讀取資料,若資料的分隔字元非逗號(,),舉例像是冒號(:)則可增加參數delimiter = ":" 來讀取
-    rows = csv.reader(file)   
-    # 以for迴圈將資料一行一行載入
-    for r in rows:
-          orders = XET.Element('url')
-
-          # 新增節點
-          order1 = XET.SubElement(orders, 'loc')
-          order1.text = str(r[0]).replace('\n', '')
-
-          # 新增節點
-          order2 = XET.SubElement(orders, 'lastmod')
-          order2.text = str(r[1]).replace('\n', '')
-
-          order3 = XET.SubElement(orders, 'priority')
-          order3.text = "1.0"
-
-          order4 = XET.SubElement(orders, 'changefreq')
-          order4.text = "daily"
-          # 輸出 XML 原始資料
-
-          a.append(orders)
-          
-indent(a,0)
-tree.write("sitemap_new.xml",encoding="utf-8", xml_declaration=True)
+
+def main():
+    get_XML("https://hhh.com.tw/sitemap.xml")
+    read_and_add_XML()
+
+if __name__ == '__main__':
+    main()