| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 | '''   (C) 2019 Raryel C. Souza    This program is free software: you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation, either version 3 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program.  If not, see <https://www.gnu.org/licenses/>.'''import re, sysclass SRTParser(object):    @staticmethod    def extractTextFromSRT(fileSRT):        file_name = fileSRT        file_encoding = 'utf-8'        #loop through the lines for parsing        with open(file_name, encoding=file_encoding, errors='replace') as f:            lines = f.readlines()            new_lines = SRTParser.clean_up(lines)            new_file_name = file_name[:-4] + '.txt'        #write parsed txt file        with open(new_file_name, 'w', encoding=file_encoding) as f:            for line in new_lines:                f.write(line)    @staticmethod    def clean_up(lines):        regexSubtitleIndexNumber = re.compile("[0-9]+")        new_lines = []        for line in lines[1:]:            #if line empty or            #if line contains --> or            #if line matches the subtitle index regex            #then skip line            if (not line or not line.strip()) or ("-->" in line) or regexSubtitleIndexNumber.match(line):                continue            else:                #append line                new_lines.append(line)        return new_lines
 |