ragas_data_generation.py 926 B

1234567891011121314151617181920212223242526272829
  1. from dotenv import load_dotenv
  2. load_dotenv('environment.env')
  3. from ragas.testset.generator import TestsetGenerator
  4. from ragas.testset.evolutions import simple, reasoning, multi_context
  5. from langchain_openai import ChatOpenAi, OpenAIEmbeddings
  6. from langchain_community.document_loaders import DirectoryLoader
  7. from langchain_community.document_loaders import PyPDFLoader
  8. loader = DirectoryLoader("Documents")
  9. for file in
  10. documents = loader.load()
  11. for document in documents:
  12. document.metadata['filename'] = document.metadata['source']
  13. generator_llm = ChatOpenAi(model = "gpt-3.5-turbo-16k")
  14. critic_llm = ChatOpenAI(model="gpt-4")
  15. embeddings = OpenAIEmbeddings()
  16. generator = TestGenerator.from_langchain(
  17. generator_llm,
  18. critic_llm,
  19. embeddings
  20. )
  21. # Generate testset
  22. testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})