gsheet2neo4j.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import pandas as pd
  2. from py2neo import Graph
  3. from py2neo import NodeMatcher, RelationshipMatcher
  4. from py2neo import Node, Relationship
  5. from py2neo.matching import NodeMatcher
  6. # Neo4j connection settings
  7. NEO4J_URL = "bolt://cmm.ai:7687"
  8. NEO4J_USERNAME = "neo4j"
  9. NEO4J_PASSWORD = "chuz8310xsystex"
  10. # Initialize the Neo4j graph database connection
  11. graph = Graph(NEO4J_URL, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
  12. import pandas as pd
  13. sheet_id ="1ltqF8KfHYin90SFfLnaTATZyLVqjJgbEJmszyV6_2JE"
  14. gid = "1280923991"
  15. df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}")
  16. import pandas as pd
  17. sheet_id ="1ltqF8KfHYin90SFfLnaTATZyLVqjJgbEJmszyV6_2JE"
  18. gid = "168214340"
  19. df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}")
  20. def get_property_text(row):
  21. # df_property = df.iloc[:, 3:]
  22. properties = {k: v for k, v in row[3:].items() if pd.notna(v)}
  23. properties["id"] = row['Node']
  24. property_list = []
  25. for k, v in properties.items():
  26. property_list.append(f"{k.strip().replace(' ', '_')}: '{v}'")
  27. property_text = f"{{{', '.join(property_list)}}}"
  28. # records = df_property.apply(lambda row: {k: v for k, v in row.items() if pd.notna(v)}, axis=1).tolist()
  29. # df['properties'] = properties
  30. return property_text
  31. def get_label_text(row):
  32. if pd.notna(row['label list']):
  33. labels = ":__Entity__:" + row['label list'].replace(" ","").replace(",", ":")
  34. else:
  35. labels = ":__Entity__"
  36. return labels
  37. def create_query_for_each_row(row):
  38. # (`溫室氣體`:定義 {id: "溫室氣體"}),
  39. node = row['Node']
  40. labels = get_label_text(row)
  41. property = get_property_text(row)
  42. query = f'(`{node}`{labels} {property})'
  43. return query
  44. # print("The Node is %s" % row["Node"])
  45. def bulk_create_query():
  46. sheet_id ="1ltqF8KfHYin90SFfLnaTATZyLVqjJgbEJmszyV6_2JE"
  47. gid = "1280923991"
  48. df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}")
  49. query = df.apply(create_query_for_each_row, axis = 1).to_list()
  50. create_query = "CREATE\n" + ',\n'.join(query)
  51. return create_query
  52. def create_node():
  53. query = bulk_create_query()
  54. graph.run(query)