Browse Source

宇宙圖

zooeytsai 2 years ago
parent
commit
c706f95623
1 changed files with 126 additions and 8 deletions
  1. 126 8
      web/main.py

+ 126 - 8
web/main.py

@@ -7,7 +7,7 @@ import time
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
-from fastapi import FastAPI, Form, Response
+from fastapi import FastAPI, Form, Response, File, UploadFile, Request
 import subprocess
 import suggests
 from typing import Optional
@@ -20,17 +20,27 @@ import logging
 import threading
 import random
 import string
-from fastapi.responses import HTMLResponse
-from fastapi.responses import RedirectResponse
+from fastapi.responses import HTMLResponse,RedirectResponse, FileResponse
 import dataset
-from selenium import webdriver
 import traceback
 import time
+from selenium import webdriver
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.service import Service
-# import pymysql
-# pymysql.install_as_MySQLdb()
+import networkx as nx
+from pyvis.network import Network
+import csv
+import sys
+import codecs
+import difflib
+import pymysql
+pymysql.install_as_MySQLdb()
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import Callable
+import shutil
+import aiofiles
 
 driver = None
 
@@ -50,8 +60,8 @@ app.add_middleware(
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
 
-# app.mount("/web", StaticFiles(directory="static"), name="static")
-app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
+app.mount("/web", StaticFiles(directory="/Users/zooeytsai/kw_tools/web/static"), name="static")
+# app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
 
 def thread_function(kw):
     global db
@@ -217,3 +227,111 @@ async def tree_list():
     # html += "<tr><td>" + c['kw'] + "</td>"
     
     return html
+
+kwdict={}
+G = nx.Graph()
+def gcm0(strings):
+    clusters = {}
+    for string in (x.strip() for x in strings):
+        match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65)
+        if match:
+            clusters[match[0]].append(string)
+        else:
+            clusters[string] = [ string ]
+    return clusters
+
+
+def proc_row(row):
+    elmts=row.split(' ')
+    for elmt in elmts:
+        if kwdict.get(elmt) is None:
+            kwdict[elmt]=1
+        else:
+            kwdict[elmt]+=1
+
+
+def save_upload_file_tmp(file: UploadFile) -> Path:
+    try:
+        suffix = Path(file.filename).suffix
+        with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+            shutil.copyfileobj(file.file, tmp)
+            tmp_path = Path(tmp.name)
+    finally:
+        file.file.close()
+    return tmp_path
+
+    
+@app.post("/kwtree")
+async def kwtree(file: UploadFile = File(...)):
+    destination_file_path = "/Users/zooeytsai/Downloads/" + file.filename
+    with codecs.open(destination_file_path,'r','utf-16') as out_file:
+    # with codecs.open(file.file) as csvfile:
+        # csv_reader = csv.reader(codecs.iterdecode(file.file, 'utf-8'))
+        csvfile = csv.reader(out_file, delimiter='\t', quotechar='|')
+        # spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
+        kwdict = {}
+        addict = {}
+        head = True
+        rowlst = []
+        for row in csvfile:
+            if head:
+                head = False
+                continue
+            ll = len(row)
+            proc_row(row[0])
+            if row not in rowlst:
+                rowlst.append(row[0])
+        head = True
+
+    clusters = gcm0(rowlst)
+    keys = []
+    for k, v in clusters.items():
+        # if len(v) > 20:
+        keys.append(k)
+        for x in v:
+            G.add_edge(k, x, weight=1, label='')
+
+    already_dict = {}
+    from strsimpy.qgram import QGram
+    qgram = QGram(2)
+    for k1 in keys:
+        for k2 in keys:
+            if k1 != k2:
+                if qgram.distance(k1, k2) <= 12:
+                    if already_dict.get(k1) is None and already_dict.get(k2) is None:
+                        already_dict[k1] = 1
+                        already_dict[k2] = 1
+                        G.add_edge(k1, k2, weight=1, label='')
+
+    pyG = Network(height="100%", width="100%", bgcolor="#444444", font_color="white")
+    pyG.set_options("""
+        const options = {
+        "nodes" : {
+                "font" : {
+                    "size" : "30",
+                    "color" : "#ffffff"
+                }
+            },
+          "physics": {
+            "forceAtlas2Based": {
+              "springLength": 100
+            },
+            "maxVelocity": 150,
+            "minVelocity": 0.28,
+            "solver": "forceAtlas2Based"
+          }
+        }
+        """)
+    G.remove_edges_from(nx.selfloop_edges(G))
+    pyG.from_nx(G)
+
+    # pyG.show_buttons(filter_=['physics'])
+    news_file = random.randint(0,100)
+    pyG.show(f'news{news_file}.html')
+    check_file = False
+    # while
+    # print(clusters)
+
+    # sys.exit()
+
+    return FileResponse(f'/Users/zooeytsai/kw_tools/choozmo/news{news_file}.html',media_type='text/html')