Browse Source

add file_loader/gsheet2neo4j.py

ling 4 months ago
parent
commit
441ba9cf2d
1 changed files with 71 additions and 0 deletions
  1. 71 0
      file_loader/gsheet2neo4j.py

+ 71 - 0
file_loader/gsheet2neo4j.py

@@ -0,0 +1,71 @@
+import pandas as pd
+
+from py2neo import Graph
+from py2neo import NodeMatcher, RelationshipMatcher
+from py2neo import Node, Relationship
+from py2neo.matching import NodeMatcher
+
+# Neo4j connection settings
+NEO4J_URL = "bolt://cmm.ai:7687"
+NEO4J_USERNAME = "neo4j"
+NEO4J_PASSWORD = "chuz8310xsystex"
+
+# Initialize the Neo4j graph database connection
+graph = Graph(NEO4J_URL, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
+
+import pandas as pd
+sheet_id ="1ltqF8KfHYin90SFfLnaTATZyLVqjJgbEJmszyV6_2JE"
+gid = "1280923991"
+df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}")
+
+import pandas as pd
+sheet_id ="1ltqF8KfHYin90SFfLnaTATZyLVqjJgbEJmszyV6_2JE"
+gid = "168214340"
+df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}")
+
+def get_property_text(row):
+    # df_property = df.iloc[:, 3:]
+    properties = {k: v for k, v in row[3:].items() if pd.notna(v)}
+    properties["id"] = row['Node']
+
+    property_list = []
+    for k, v in properties.items():
+        property_list.append(f"{k.strip().replace(' ', '_')}: '{v}'")
+    property_text = f"{{{', '.join(property_list)}}}"
+
+    # records = df_property.apply(lambda row: {k: v for k, v in row.items() if pd.notna(v)}, axis=1).tolist()
+    # df['properties'] = properties
+
+    return property_text
+
+def get_label_text(row):
+    if pd.notna(row['label list']):
+        labels = ":__Entity__:" + row['label list'].replace(" ","").replace(",", ":")
+    else:
+        labels = ":__Entity__"
+    return labels
+
+def create_query_for_each_row(row):
+    # (`溫室氣體`:定義 {id: "溫室氣體"}),
+    node = row['Node']
+    labels = get_label_text(row)
+    property = get_property_text(row)
+
+    query = f'(`{node}`{labels} {property})'
+
+    return query
+    # print("The Node is %s" % row["Node"])
+    
+def bulk_create_query():
+    sheet_id ="1ltqF8KfHYin90SFfLnaTATZyLVqjJgbEJmszyV6_2JE"
+    gid = "1280923991"
+    df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}")
+
+    query = df.apply(create_query_for_each_row, axis = 1).to_list()
+    create_query = "CREATE\n" + ',\n'.join(query)
+
+    return create_query
+
+def create_node():
+    query = bulk_create_query()
+    graph.run(query)