designer_gen.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. #!/usr/bin/python3
  2. import os
  3. import codecs
  4. import dataset
  5. def extract_comp(fname):
  6. resultstr=""
  7. fr=codecs.open(fname,'r','utf-8')
  8. lines=fr.readlines()
  9. inloop=False
  10. for l in lines:
  11. if inloop:
  12. if '<script async defer crossorigin="anonymous" src="https://connect.facebook' in l:
  13. # break
  14. #<script async="" defer="" crossorigin="anonymous" src="https://connect.facebook.net/zh_TW/sdk.js#xfbml=1&amp;version=v9.0" nonce=""></script>
  15. # if '/article>' in l:
  16. # resultstr+=l
  17. break
  18. resultstr+=l+"\n"
  19. # if 'class="dAbout"' in l and 'article' in l:
  20. if 'row justify-content-between mt-5' in l:
  21. inloop=True
  22. resultstr+=l+"\n"
  23. continue
  24. return resultstr
  25. #<script async defer crossorigin="anonymous" src="https://connect.facebook.net/
  26. def proc_file(fname,fname2,compcontent):
  27. fr=codecs.open(fname,'r','utf-8')
  28. fw=codecs.open(fname2,'w','utf-8')
  29. lines=fr.readlines()
  30. scriptflag=False
  31. for l in lines:
  32. # <div class="row mt-5" id="row">
  33. if 'div' in l and 'class' in l and 'row mt-5' in l and 'id="row"' in l:
  34. fw.write('<div class="row mt-5" id="row_company" style="display:none">\n')
  35. fw.write(compcontent)
  36. fw.write('</div>\n')
  37. fw.write('<div class="row mt-5" id="row_intro">\n')
  38. continue
  39. if 'link rel' in l and '/assets/css/all.css?v=' in l and 'stylesheet' in l:
  40. fw.write('<link rel="stylesheet" href="/assets/css/all.css?v=1640073543">')
  41. continue
  42. if 'link rel' in l and '/assets/css/main.css?v=' in l and 'stylesheet' in l:
  43. fw.write('<link rel="stylesheet" href="/assets/css/main.css?v=1640073543">')
  44. continue
  45. if 'likeSee__state__filter' in l and 'likeSee__state__filter--offset'in l:
  46. fw.write('<div class="likeSee__state__filter likeSee__state__filter--offset" id="id_offs">')
  47. continue
  48. if 'nav-link' in l and '設計師作品' in l and 'class' in l:
  49. if 'active' in l:
  50. fw.write('<a href="#" class="nav-link active" onclick="c_intro();" id="a_intro">設計師作品</a>')
  51. else:
  52. fw.write('<a href="#" class="nav-link " onclick="c_intro();" id="a_intro">設計師作品</a>')
  53. continue
  54. if 'nav-link' in l and '設計師公司簡介' in l and 'class' in l:
  55. if 'active' in l:
  56. fw.write('<a href="#" class="nav-link active" onclick="c_comp();" id="a_company">設計師公司簡介</a>')
  57. else:
  58. fw.write('<a href="#" class="nav-link " onclick="c_comp();" id="a_company">設計師公司簡介</a>')
  59. continue
  60. fw.write(l)
  61. if '</script>' in l and not scriptflag:
  62. scriptflag=True
  63. fw.write('\n <script async defer crossorigin="anonymous" src="https://connect.facebook.net/zh_TW/sdk.js#xfbml=1&version=v9.0" nonce="7TsDzQN7"></script> \n')
  64. fw.write("\n <script>")
  65. fw.write("function c_intro(){ \n")
  66. fw.write("var rintro= document.getElementById('row_intro'); \n")
  67. fw.write("var rcomp = document.getElementById('row_company'); \n")
  68. fw.write("var aintro = document.getElementById('a_intro'); \n")
  69. fw.write("var acompany = document.getElementById('a_company'); \n")
  70. fw.write("var id_offs = document.getElementById('id_offs'); \n")
  71. fw.write(' rcomp.style.display = "none"; \n')
  72. fw.write(' id_offs.style.display="flex"; \n')
  73. fw.write(' rintro.style.display="flex"; \n')
  74. # fw.write(' rcomp.style.display = "none"; \n')
  75. fw.write(' aintro.className = "nav-link active"; \n')
  76. fw.write(' acompany.className = "nav-link"; \n')
  77. fw.write("} \n")
  78. fw.write("function c_comp(){ \n")
  79. fw.write("var rintro= document.getElementById('row_intro'); \n")
  80. fw.write("var rcomp = document.getElementById('row_company'); \n")
  81. fw.write("var aintro = document.getElementById('a_intro'); \n")
  82. fw.write("var acompany = document.getElementById('a_company'); \n")
  83. fw.write("var id_offs = document.getElementById('id_offs'); \n")
  84. fw.write(' id_offs.style.display="none"; \n')
  85. fw.write(' rintro.style.display="none"; \n')
  86. fw.write(' rcomp.style.display = "flex"; \n')
  87. #fw.write(' rintro.style.display="none"; \n')
  88. fw.write(' acompany.className = "nav-link active"; \n')
  89. fw.write(' aintro.className = "nav-link"; \n')
  90. fw.write("} </script> \n")
  91. #
  92. # print(lines)
  93. fr.close()
  94. fw.close()
  95. db = dataset.connect('mysql://hhh7796hhh:lYmWsu^ujcA1@hhh-v57-cluster.cluster-cmab1ctkglka.ap-northeast-2.rds.amazonaws.com:3306/xoops?charset=utf8mb4')
  96. dir_prefix='/var/www/jared-six/'
  97. cursor=db.query('SELECT hdesigner_id FROM xoops._hdesigner where onoff=1; ')
  98. for c in cursor:
  99. did=c['hdesigner_id']
  100. # if did != 23:
  101. # continue
  102. print(did)
  103. curdir=dir_prefix+str(did)
  104. if not os.path.exists(curdir):
  105. os.mkdir(curdir)
  106. os.chdir(curdir)
  107. os.system('curl https://hhh.com.tw/designers/cases/'+str(did)+'/1-page/new-sort/ -o orig.html')
  108. # os.system('curl https://hhh.com.tw/designers/index/'+str(did)+'/ -o orig.html')
  109. os.system('curl https://hhh.com.tw/designers/details/'+str(did)+'/ -o comp.html')
  110. result=extract_comp(curdir+"/comp.html")
  111. print(result)
  112. proc_file(curdir+"/orig.html",curdir+"/index.html",result)
  113. #for l in lst:
  114. # os.chdir(curdir)
  115. # os.system('curl https://hhh.com.tw/columns/detail/'+l+'/ > index.html')