choozmo
/
bhouse_backstagev2


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
							from bs4 import BeautifulSoup
import logging

logger = logging.getLogger(__name__)


def get_section_parser(section_class_name):
    return SECTION_PARSER.get(section_class_name, SectionParser)


class SectionParser():
    def __init__(self, update_data):
        self.update_data = update_data
        self.b_order, self.p_order, self.img_order, self.a_order = 0, 0, 0, 0
        self.card_text_order, self.title_mb_text_order, self.yt_video_order = 0, 0, 0
        self.is_amp_img, self.is_amp_youtube = False, False

    def _get_spaces(self, text):
        count = 0
        for i in text:
            if i == '<':
                break
            count += 1
        return (count - 1) * ' '

    def update(self, content, text):
        try:
            if '<b' in text:
                soup = BeautifulSoup(text, "html.parser")
                soup.b.string = self.update_data.get('b', [''])[self.b_order]
                text = self._get_spaces(text) + str(soup)
                self.b_order += 1
            elif '<p' in text:
                soup = BeautifulSoup(text, "html.parser")
                soup.p.string = self.update_data.get('p', [''])[self.p_order]
                text = self._get_spaces(text) + str(soup)
                self.p_order += 1
            elif '<a ' in text and '</a>' in text:
                soup = BeautifulSoup(text, "html.parser")
                soup.a.string = self.update_data.get('a', [''])[self.a_order]
                text = self._get_spaces(text) + str(soup)
                self.a_order += 1
            elif 'card-text' in text:
                soup = BeautifulSoup(text, "html.parser")
                soup.div.string = self.update_data.get('card_text', [''])[self.card_text_order]
                text = self._get_spaces(text) + str(soup)
                self.card_text_order += 1
            elif 'title mb' in text:
                soup = BeautifulSoup(text, "html.parser")
                soup.div.string = self.update_data.get('title_mb_text', [''])[self.card_text_order]
                text = self._get_spaces(text) + str(soup)
                self.title_mb_text_order += 1
            elif '<amp-img' in text:
                self.is_amp_img = True
            elif '<amp-youtube' in text:
                self.is_amp_youtube = True
            if self.is_amp_img:
                if 'src=' in text:
                    text_list = text.split('src=')
                    text_list[-1] = 'src="{}"\n'.format(self.update_data.get(
                        'img', [{}])[self.img_order].get('src', ''))
                    text = ''.join(text_list)
                if '</amp-img>' in text:
                    self.is_amp_img = True
                    self.img_order += 1
            elif self.is_amp_youtube:
                if 'data-videoid' in text:
                    text_list = text.split('data-videoid=')
                    text_list[-1] = 'data-videoid="{}"\n'.format(self.update_data.get(
                        'yt_video', [{}])[self.yt_video_order].get('videoid', ''))
                    text = ''.join(text_list)
                if '</amp-youtube>' in text:
                    self.is_amp_youtube = True
                    self.yt_video_order += 1
        except Exception as err:
            logger.error('section parser failed with {}'.format(err))
        finally:
            content += text
        return content


class Section18SectionParser(SectionParser):
    def __init__(self, update_data):
        super().__init__(update_data)
        self.is_pure_div = False
        self.is_pure_div_order, self.mb_5_order = 0, 0

    def _update_div_data(self, text, update_key, order):
        soup = BeautifulSoup(text, "html.parser")
        soup.div.string = self.update_data.get(update_key, [''])[order]
        return self._get_spaces(text) + str(soup)

    def update(self, content, text):
        try:
            if 'title mb' in text:
                text = self._update_div_data(text, 'title_mb_text', self.card_text_order)
                self.title_mb_text_order += 1
                if self.title_mb_text_order == 2:
                    self.is_pure_div = True
            elif self.is_pure_div:
                text = self._update_div_data(text, 'pure_div_text', self.is_pure_div_order)
                self.is_pure_div_order += 1
                self.is_pure_div = False
            elif 'mb-5' in text:
                text = self._update_div_data(text, 'mb_5_text', self.mb_5_order)
                self.mb_5_order += 1
            elif '<a ' in text:
                soup = BeautifulSoup(text, "html.parser")
                soup.a.string = self.update_data.get('a', [''])[self.a_order]
                text = self._get_spaces(text) + str(soup)
                self.a_order += 1
        except Exception as err:
            logger.error('section parser failed with {}'.format(err))
        finally:
            content += text
        return content


SECTION_PARSER = {'section18': Section18SectionParser}