docker-compose.yml 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. version: "3"
  2. services:
  3. huggingface_inference:
  4. image: ghcr.io/huggingface/text-generation-inference:1.1.0
  5. volumes:
  6. - ./models/:/data
  7. ports:
  8. - "3000:80"
  9. shm_size: '1gb'
  10. command: --model-id meta-llama/Llama-2-7b-chat-hf --sharded true --num-shards 2
  11. # --json-output
  12. environment:
  13. # - num_shard=1
  14. - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
  15. - DISABLE_CUSTOM_KERNELS=${DISABLE_CUSTOM_KERNELS}
  16. - HF_HUB_ENABLE_HF_TRANSFER={HF_HUB_ENABLE_HF_TRANSFER}
  17. deploy:
  18. resources:
  19. reservations:
  20. devices:
  21. - driver: nvidia
  22. count: all
  23. capabilities: [gpu]
  24. working_dir: /app
  25. mongo_chatui:
  26. image: mongo:latest
  27. ports:
  28. - "27017:27017"
  29. chat_ui:
  30. build:
  31. context: ./chat-ui
  32. dockerfile: Dockerfile
  33. command: >
  34. sh -c "npm run dev -- --host"
  35. volumes:
  36. - ./chat-ui/.env.local:/app/chat-ui/.env.local
  37. ports:
  38. - "5173:5173"
  39. depends_on:
  40. - mongo_chatui
  41. - huggingface_inference
  42. # llamacpp:
  43. # image: ghcr.io/ggerganov/llama.cpp:full-cuda
  44. # debug:
  45. # image: ubuntu:latest
  46. # entrypoint: /bin/sh
  47. # stdin_open: true # docker run -i
  48. # tty: true # docker run -t
  49. # volumes:
  50. # - ./models/:/data
  51. # 70b
  52. # meta-llama/Llama-2-70b-chat-hf
  53. # 70b q
  54. # TheBloke/Llama-2-70B-chat-AWQ
  55. # 7b
  56. # meta-llama/Llama-2-7b-chat-hf
  57. # mistralai/Mistral-7B-Instruct-v0.1
  58. # 7b q
  59. # TheBloke/Mistral-7B-Instruct-v0.1-AWQ
  60. # TheBloke/Llama-2-7b-Chat-AWQ
  61. # daryl149/llama-2-7b-chat-hf
  62. # georgesung/llama2_7b_chat_uncensored