4 years ago · 127a840188
--- a/models/utils/__init__.py
+++ b/models/utils/__init__.py
@@ -0,0 +1,9 @@
 
				+def write_md(f_dir, content):
			
 
				+    with open(f_dir, 'w') as md:
			
 
				+        md.write(content)
			
 
				+
			
 
				+
			
 
				+def read_line_md(f_dir):
			
 
				+    with open(f_dir, 'r') as md:
			
 
				+        pre_content = md.readlines()
			
 
				+    return pre_content
			
--- a/models/utils/parsers.py
+++ b/models/utils/parsers.py
@@ -0,0 +1,119 @@
 
				+from bs4 import BeautifulSoup
			
 
				+import logging
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def get_section_parser(section_class_name):
			
 
				+    return SECTION_PARSER.get(section_class_name, SectionParser)
			
 
				+
			
 
				+
			
 
				+class SectionParser():
			
 
				+    def __init__(self, update_data):
			
 
				+        self.update_data = update_data
			
 
				+        self.b_order, self.p_order, self.img_order, self.a_order = 0, 0, 0, 0
			
 
				+        self.card_text_order, self.title_mb_text_order, self.yt_video_order = 0, 0, 0
			
 
				+        self.is_amp_img, self.is_amp_youtube = False, False
			
 
				+
			
 
				+    def _get_spaces(self, text):
			
 
				+        count = 0
			
 
				+        for i in text:
			
 
				+            if i == '<':
			
 
				+                break
			
 
				+            count += 1
			
 
				+        return (count - 1) * ' '
			
 
				+
			
 
				+    def update(self, content, text):
			
 
				+        try:
			
 
				+            if '<b' in text:
			
 
				+                soup = BeautifulSoup(text, "html.parser")
			
 
				+                soup.b.string = self.update_data.get('b', [''])[self.b_order]
			
 
				+                text = self._get_spaces(text) + str(soup)
			
 
				+                self.b_order += 1
			
 
				+            elif '<p' in text:
			
 
				+                soup = BeautifulSoup(text, "html.parser")
			
 
				+                soup.p.string = self.update_data.get('p', [''])[self.p_order]
			
 
				+                text = self._get_spaces(text) + str(soup)
			
 
				+                self.p_order += 1
			
 
				+            elif '<a ' in text:
			
 
				+                soup = BeautifulSoup(text, "html.parser")
			
 
				+                soup.a.string = self.update_data.get('a', [''])[self.a_order]
			
 
				+                text = self._get_spaces(text) + str(soup)
			
 
				+                self.a_order += 1
			
 
				+            elif 'card-text' in text:
			
 
				+                soup = BeautifulSoup(text, "html.parser")
			
 
				+                soup.div.string = self.update_data.get('card_text', [''])[self.card_text_order]
			
 
				+                text = self._get_spaces(text) + str(soup)
			
 
				+                self.card_text_order += 1
			
 
				+            elif 'title mb' in text:
			
 
				+                soup = BeautifulSoup(text, "html.parser")
			
 
				+                soup.div.string = self.update_data.get('title_mb_text', [''])[self.card_text_order]
			
 
				+                text = self._get_spaces(text) + str(soup)
			
 
				+                self.title_mb_text_order += 1
			
 
				+            elif '<amp-img' in text:
			
 
				+                self.is_amp_img = True
			
 
				+            elif '<amp-youtube' in text:
			
 
				+                self.is_amp_youtube = True
			
 
				+            if self.is_amp_img:
			
 
				+                if 'src=' in text:
			
 
				+                    text_list = text.split('src=')
			
 
				+                    text_list[-1] = 'src="{}"\n'.format(self.update_data.get(
			
 
				+                        'img', [{}])[self.img_order].get('src', ''))
			
 
				+                    text = ''.join(text_list)
			
 
				+                if '</amp-img>' in text:
			
 
				+                    self.is_amp_img = True
			
 
				+                    self.img_order += 1
			
 
				+            elif self.is_amp_youtube:
			
 
				+                if 'data-videoid' in text:
			
 
				+                    text_list = text.split('data-videoid=')
			
 
				+                    text_list[-1] = 'data-videoid="{}"\n'.format(self.update_data.get(
			
 
				+                        'yt_video', [{}])[self.yt_video_order].get('videoid', ''))
			
 
				+                    text = ''.join(text_list)
			
 
				+                if '</amp-youtube>' in text:
			
 
				+                    self.is_amp_youtube = True
			
 
				+                    self.yt_video_order += 1
			
 
				+        except Exception as err:
			
 
				+            logger.error('section parser failed with {}'.format(err))
			
 
				+        finally:
			
 
				+            content += text
			
 
				+        return content
			
 
				+
			
 
				+
			
 
				+class Section18SectionParser(SectionParser):
			
 
				+    def __init__(self, update_data):
			
 
				+        super().__init__(update_data)
			
 
				+        self.is_pure_div = False
			
 
				+        self.is_pure_div_order, self.mb_5_order = 0, 0
			
 
				+
			
 
				+    def _update_div_data(self, text, update_key, order):
			
 
				+        soup = BeautifulSoup(text, "html.parser")
			
 
				+        soup.div.string = self.update_data.get(update_key, [''])[order]
			
 
				+        return self._get_spaces(text) + str(soup)
			
 
				+
			
 
				+    def update(self, content, text):
			
 
				+        try:
			
 
				+            if 'title mb' in text:
			
 
				+                text = self._update_div_data(text, 'title_mb_text', self.card_text_order)
			
 
				+                self.title_mb_text_order += 1
			
 
				+                if self.title_mb_text_order == 2:
			
 
				+                    self.is_pure_div = True
			
 
				+            elif self.is_pure_div:
			
 
				+                text = self._update_div_data(text, 'pure_div_text', self.is_pure_div_order)
			
 
				+                self.is_pure_div_order += 1
			
 
				+                self.is_pure_div = False
			
 
				+            elif 'mb-5' in text:
			
 
				+                text = self._update_div_data(text, 'mb_5_text', self.mb_5_order)
			
 
				+                self.mb_5_order += 1
			
 
				+            elif '<a ' in text:
			
 
				+                soup = BeautifulSoup(text, "html.parser")
			
 
				+                soup.a.string = self.update_data.get('a', [''])[self.a_order]
			
 
				+                text = self._get_spaces(text) + str(soup)
			
 
				+                self.a_order += 1
			
 
				+        except Exception as err:
			
 
				+            logger.error('section parser failed with {}'.format(err))
			
 
				+        finally:
			
 
				+            content += text
			
 
				+        return content
			
 
				+
			
 
				+
			
 
				+SECTION_PARSER = {'section18': Section18SectionParser}
			
--- a/models/utils/validators.py
+++ b/models/utils/validators.py
@@ -0,0 +1,78 @@
 
				+def is_valid_section(section_class_name, text):
			
 
				+    return IS_VALID_SECTION.get(section_class_name, unvalid_section)(text)
			
 
				+
			
 
				+
			
 
				+def is_valid_section3_section(text):
			
 
				+    return 'class="section3"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section13_overly_section(text):
			
 
				+    return 'class="section13 overly"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section14_section(text):
			
 
				+    return 'class="section14"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section13_section(text):
			
 
				+    return 'class="section13"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section16_section(text):
			
 
				+    return 'class="section16"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section17_section(text):
			
 
				+    return 'class="section17"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section18_section(text):
			
 
				+    return 'class="section18"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section19_section(text):
			
 
				+    return 'class="section19"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section22_section(text):
			
 
				+    return 'class="section22"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section25_section(text):
			
 
				+    return 'class="section25"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section26_section(text):
			
 
				+    return 'class="section26"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section27_section(text):
			
 
				+    return 'class="section27"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section28_section(text):
			
 
				+    return 'class="section28"' in text
			
 
				+
			
 
				+
			
 
				+def is_valid_section29_section(text):
			
 
				+    return 'class="section29"' in text
			
 
				+
			
 
				+
			
 
				+def unvalid_section(text):
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+IS_VALID_SECTION = {'section3': is_valid_section3_section,
			
 
				+                    'section13_overly': is_valid_section13_overly_section,
			
 
				+                    'section13': is_valid_section13_section,
			
 
				+                    'section14': is_valid_section14_section,
			
 
				+                    'section16': is_valid_section16_section,
			
 
				+                    'section17': is_valid_section17_section,
			
 
				+                    'section18': is_valid_section18_section,
			
 
				+                    'section19': is_valid_section19_section,
			
 
				+                    'section22': is_valid_section22_section,
			
 
				+                    'section25': is_valid_section25_section,
			
 
				+                    'section26': is_valid_section26_section,
			
 
				+                    'section27': is_valid_section27_section,
			
 
				+                    'section28': is_valid_section28_section,
			
 
				+                    'section29': is_valid_section29_section}