main.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. from pydoc import HTMLDoc
  2. from fastapi import FastAPI
  3. import dataset
  4. import sys
  5. import os
  6. import time
  7. from fastapi.middleware.cors import CORSMiddleware
  8. from fastapi.staticfiles import StaticFiles
  9. from pydantic import BaseModel
  10. from fastapi import FastAPI, Form, Response
  11. import subprocess
  12. import suggests
  13. from typing import Optional
  14. import networkx as nx
  15. import pyvis
  16. import time
  17. from pyvis.network import Network
  18. import pickle
  19. import logging
  20. import threading
  21. import random
  22. import string
  23. from fastapi.responses import HTMLResponse
  24. from fastapi.responses import RedirectResponse
  25. import dataset
  26. from selenium import webdriver
  27. import traceback
  28. import time
  29. from selenium.webdriver.common.keys import Keys
  30. from selenium.webdriver.common.by import By
  31. from selenium.webdriver.chrome.service import Service
  32. # import pymysql
  33. # pymysql.install_as_MySQLdb()
  34. driver = None
  35. def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
  36. return ''.join(random.choice(chars) for _ in range(size))
  37. app = FastAPI()
  38. origins = ["*"]
  39. app.add_middleware(
  40. CORSMiddleware,
  41. allow_origins=origins,
  42. allow_credentials=True,
  43. allow_methods=["*"],
  44. allow_headers=["*"],
  45. )
  46. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
  47. # app.mount("/web", StaticFiles(directory="static"), name="static")
  48. app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
  49. def thread_function(kw):
  50. global db
  51. print(kw)
  52. G = nx.Graph()
  53. for k in kw:
  54. s = suggests.suggests.get_suggests(k, source='google')
  55. for sg in s['suggests']:
  56. G.add_edge(k,sg,weight=1)
  57. print(sg)
  58. time.sleep(1)
  59. s2 = suggests.suggests.get_suggests(k, source='google')
  60. for elmt in s2['suggests']:
  61. G.add_edge(sg,elmt,weight=1)
  62. # G.remove_nodes_from(list(nx.isolates(G)))
  63. G.remove_edges_from( list(nx.selfloop_edges(G)))
  64. # pickle.dump( G, open( "gs2.p", "wb" ) )
  65. pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
  66. pyG.from_nx(G)
  67. id=id_generator()
  68. db['gen_graph'].insert({'filename':str(id),'kw':str(kw)})
  69. # pyG.save_graph('gstest')
  70. # pyG.show('static/gs/'+str(id)+'.html')
  71. pyG.save_graph('static/gs/'+str(id)+'.html')
  72. @app.get("/tree_list/",response_class=HTMLResponse)
  73. async def tree_list():
  74. # global db
  75. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
  76. html="<html><body><h2>清單</h2></br>請一分鐘後refresh </br></br>"
  77. html+="<table border='1'>"
  78. cursor=db.query('select filename,kw from gen_graph order by id desc')
  79. cnt=0
  80. for c in cursor:
  81. html+="<tr><td>"+c['kw']+"</td>"
  82. html+="<td><a href='/web/gs/"+c['filename']+".html'>"+c['filename']+"</a></td></tr>"
  83. cnt+=1
  84. if cnt > 10:
  85. break
  86. html+="</table></body></html>"
  87. return html
  88. @app.post("/proj_kw/",response_class=HTMLResponse)
  89. async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)):
  90. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  91. table=db['serp_jobs']
  92. for kw in kws:
  93. table.insert({'proj':proj,'kw':kw})
  94. return "OK請稍後"
  95. #response_class=RedirectResponse
  96. @app.post("/gen_tree/",response_class=HTMLResponse)
  97. async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ):
  98. kwlst=[]
  99. if len(kw)>1:
  100. kwlst.append(kw)
  101. if kw2 is not None:
  102. kwlst.append(kw2)
  103. if kw3 is not None:
  104. kwlst.append(kw3)
  105. if kw4 is not None:
  106. kwlst.append(kw4)
  107. x = threading.Thread(target=thread_function, args=(kwlst,))
  108. x.start()
  109. # return "ok"
  110. return RedirectResponse(url="/tree_list",status_code=302)
  111. # return HTMLResponse('<html><head><meta http-equiv="refresh" content="0; URL="/tree_list" /></head></html>')
  112. def restart_browser():
  113. global driver
  114. while True:
  115. try:
  116. os.system('docker container restart tiny1')
  117. time.sleep(1)
  118. break
  119. except:
  120. os.system('docker container restart tiny1')
  121. time.sleep(10)
  122. if driver is not None:
  123. print('closing')
  124. driver.quit()
  125. driver=None
  126. try:
  127. options = webdriver.ChromeOptions()
  128. options.add_argument("--no-sandbox")
  129. #options.add_argument("--disable-dev-shm-usage")
  130. options.add_argument('--headless')
  131. options.add_experimental_option("debuggerAddress", "127.0.0.1:9923")
  132. options.add_argument("--incognito")
  133. try:
  134. driver = webdriver.Chrome(options=options,executable_path='/root/driver/chromedriver')
  135. #driver = webdriver.Remote(command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',options=options)
  136. except:
  137. return None
  138. except:
  139. print('開啟失敗')
  140. driver=None
  141. return None
  142. return driver
  143. @app.post("/ranking/")
  144. async def ranking(kw: str = Form(...), domain:str = Form(...),kw2:Optional[str] = Form(None),domain2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),domain3:Optional[str] = Form(None),kw4:Optional[str] = Form(None),domain4:Optional[str] = Form(None),kw5:Optional[str] = Form(None),domain5:Optional[str] = Form(None)):
  145. kwlst = []
  146. kwlst.append([kw,domain])
  147. if kw2 is not None:
  148. kwlst.append([kw2,domain2])
  149. if kw3 is not None:
  150. kwlst.append([kw3,domain3])
  151. if kw4 is not None:
  152. kwlst.append([kw4,domain4])
  153. if kw5 is not None:
  154. kwlst.append([kw5,domain5])
  155. result = []
  156. for i in kwlst:
  157. driver = restart_browser()
  158. # escaped_search_term=urllib.parse.quote(term)
  159. googleurl = 'https://www.google.com/?num=100'
  160. driver.get(googleurl)
  161. time.sleep(6)
  162. send_kw_elmt = driver.find_element(By.XPATH,
  163. '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
  164. send_kw_elmt.send_keys(i[0])
  165. time.sleep(3)
  166. send_kw_elmt.send_keys(Keys.ENTER)
  167. time.sleep(6)
  168. elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
  169. cnt = 1
  170. datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
  171. domain_name = i[1]
  172. for elmt in elmts:
  173. try:
  174. href = elmt.get_attribute('href')
  175. if domain_name in href:
  176. datadict['搜尋詞'].append(i[0])
  177. datadict['結果標題'].append(elmt.text)
  178. datadict['結果網址'].append(href)
  179. datadict['結果名次'].append(str(cnt))
  180. cnt += 1
  181. except:
  182. print('href2 exception')
  183. traceback.print_exc()
  184. result.append(datadict)
  185. print(domain_name)
  186. print(datadict)
  187. web_driver.quit()
  188. print('數量',len(elmts))
  189. time.sleep(90)
  190. # return "ok"
  191. # return RedirectResponse(url="/ranking_result",)
  192. html = f"<html><body>{result}</body></html>"
  193. return html
  194. @app.get("/ranking_result/")
  195. async def tree_list():
  196. html = "<table border='1'>"
  197. # html += "<tr><td>" + c['kw'] + "</td>"
  198. return html