from instaloader import Instaloader, Profile
import traceback
import copy
import operator
import dataset
import pandas as pd
import networkx as nx
#import pysftp
import codecs
import pyvis
import sys
import pickle
import os
import searchconsole
from pyvis.network import Network
import jieba
pname='hhh.rb'

db = dataset.connect('sqlite:///'+pname+".db")
table=db['tmp']
#pname='cont'
#pname='damanwoo'
#pname='drama'
#pname='news'

#pname='www'

#pname='ipromise'
#pname='sports'
#pname='rumor'
#pname='korea'
pname='hhh.rb'

rid=0

def get_css():
    fr=codecs.open('jared/data/css.txt','r','utf-8')
    lines=fr.readlines()
    content=' '.join(lines)
    fr.close()
    return content

def modify_file(fname):
    fr=codecs.open(fname,'r','utf-8')
    lines=fr.readlines()
    fr.close()
#    css=get_css()
    css=''
    content_output=''
    for l in lines:
        if '<body>' in l[0:10]:
            content_output+=l
            content_output+='\n<div id="google">\n'
            continue
        if '<style type="text' in l[0:22]:
            content_output+=l
            content_output+="\n"+css+"\n"
            continue
        if '<div id = "mynetwork"' in l[0:30]:
            content_output+=l
            content_output+='\n</div>\n'
            continue

        content_output+=l

    fw=codecs.open("mod_"+fname,'w','utf-8')
    fw.write(content_output)
    fw.close()


def gen_pic():
    G=None
#    if os.path.exists(pname):
#        G = pickle.load( open( pname, "rb" ) )
#    else:
#        G = nx.Graph()
    G = nx.Graph()

    finallist=[]

#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')
#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')
#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having ) as tbl1 ) order by q')
#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   ) as tbl1 ) order by q')
#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   having count(url) >20) as tbl1 ) order by q')
#    cursor=db.query('select distinct q,url from tmp where url in (select distinct url from  (select url,count(q) from tmp where length(q)> 2 group by url   having count(q) >5 and count(q)<10 ) as tbl1) order by url')
    cursor=db.query('select distinct q,url from tmp where url in (select distinct url from  (select url,count(q) from tmp where length(q)> 2 group by url   having count(q) >4 and count(q)<9 ) as tbl1) order by url')

    urldict={}
    for c in cursor:
        url=c['url'].replace('https://www.hhh.com.tw','')
        url=url.replace('https://hhh.com.tw','')
        url=url.replace('https://m.hhh.com.tw','')

        q=c['q']
        if urldict.get(url) is None:
            urldict[url]=[q]
        else:
            urldict[url].append(q)
    print(len(urldict.keys()))
    cnt=0
    for k,v in urldict.items():
        if len(v)>=2:
            for itm in v:
                G.add_edge(k,itm,weight=3,width=3,borderwidth=3)
                cnt+=1
                if cnt%1000 == 0:
                    print(cnt)

#            G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)

#    pickle.dump( G, open( pname, "wb" ) )

#    G2 = [G.subgraph(c).copy() for c in nx.connected_components(G)]
#    remove = [node for node,degree in dict(G.degree()).items() if degree <2]
#    G.remove_nodes_from(remove)

    remove=[]
#    for n in G.nodes:
#        if '承諾' in n:
#            remove.append(n)
#        if 'promise' in n:
#            remove.append(n)
#    G.remove_nodes_from(remove)

    to_remove=[]
    for n in G:
        dg=G.degree(n)
        if dg > 15:
            to_remove.append(n)
    G.remove_nodes_from(to_remove)

    G.remove_edges_from(nx.selfloop_edges(G))
    G.remove_nodes_from(list(nx.isolates(G)))
#    lst= [G.subgraph(c).copy() for c in nx.connected_components(G)]
#    lst=[]
#    for c in nx.connected_components(G):
#        cc=G.subgraph(c).copy()
#        if cc.number_of_nodes()>7:
#            lst.append(cc)

#        if nx.diameter(cc, e=None, usebounds=False)>1:
#            lst.append(cc)

#    G2=nx.compose_all(lst)
    G2=G
#    pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
    pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")




    pyG.from_nx(G2)
    pyG.show(pname+'.html')
    modify_file(pname+'.html')

#    cnopts = pysftp.CnOpts()
#    cnopts.hostkeys = None
#    s = pysftp.Connection(host='www.choozmo.com', username='jared', password='sstc5202',cnopts=cnopts)
#    local_path = "mod_"+pname+".html"
#    remote_path = "/home/nginx/farmoutput/tags/"+"mod_"+pname+".html"
#    s.put(local_path, remote_path)


    return finallist

gen_pic()
#r=checkig('信用卡')
#print(r)

#        network.on( 'click', function(properties) {
#    var ids = properties.nodes;
#    var clickedNodes = nodes.get(ids);
# var copyText = clickedNodes[0].label;
# var promise = navigator.clipboard.writeText(copyText);
#//    console.log('clicked nodes:', clickedNodes);
#});