#!/usr/bin/python3 import os import codecs import dataset def extract_comp(fname): resultstr="" fr=codecs.open(fname,'r','utf-8') lines=fr.readlines() inloop=False for l in lines: if inloop: if ' # if '/article>' in l: # resultstr+=l break resultstr+=l+"\n" # if 'class="dAbout"' in l and 'article' in l: if 'row justify-content-between mt-5' in l: inloop=True resultstr+=l+"\n" continue return resultstr #' in l and not scriptflag: scriptflag=True fw.write('\n \n') fw.write("\n \n") # # print(lines) fr.close() fw.close() db = dataset.connect('mysql://hhh7796hhh:lYmWsu^ujcA1@hhh-v57-cluster.cluster-cmab1ctkglka.ap-northeast-2.rds.amazonaws.com:3306/xoops?charset=utf8mb4') dir_prefix='/var/www/jared-six/' cursor=db.query('SELECT hdesigner_id FROM xoops._hdesigner where onoff=1; ') for c in cursor: did=c['hdesigner_id'] # if did != 23: # continue print(did) curdir=dir_prefix+str(did) if not os.path.exists(curdir): os.mkdir(curdir) os.chdir(curdir) os.system('curl https://hhh.com.tw/designers/cases/'+str(did)+'/1-page/new-sort/ -o orig.html') # os.system('curl https://hhh.com.tw/designers/index/'+str(did)+'/ -o orig.html') os.system('curl https://hhh.com.tw/designers/details/'+str(did)+'/ -o comp.html') result=extract_comp(curdir+"/comp.html") print(result) proc_file(curdir+"/orig.html",curdir+"/index.html",result) #for l in lst: # os.chdir(curdir) # os.system('curl https://hhh.com.tw/columns/detail/'+l+'/ > index.html')