1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- version: "3"
- services:
- huggingface_inference:
- image: ghcr.io/huggingface/text-generation-inference:1.1.0
- volumes:
- - ./models/:/data
- ports:
- - "3000:80"
- shm_size: '1gb'
- command: --model-id meta-llama/Llama-2-7b-chat-hf --sharded true --num-shards 2
- # --json-output
- environment:
- # - num_shard=1
- - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
- - DISABLE_CUSTOM_KERNELS=${DISABLE_CUSTOM_KERNELS}
- - HF_HUB_ENABLE_HF_TRANSFER={HF_HUB_ENABLE_HF_TRANSFER}
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: all
- capabilities: [gpu]
- working_dir: /app
- mongo_chatui:
- image: mongo:latest
- ports:
- - "27017:27017"
- chat_ui:
- build:
- context: ./chat-ui
- dockerfile: Dockerfile
- command: >
- sh -c "npm run dev -- --host"
- volumes:
- - ./chat-ui/.env.local:/app/chat-ui/.env.local
- ports:
- - "5173:5173"
- depends_on:
- - mongo_chatui
- - huggingface_inference
- # llamacpp:
- # image: ghcr.io/ggerganov/llama.cpp:full-cuda
- # debug:
- # image: ubuntu:latest
- # entrypoint: /bin/sh
- # stdin_open: true # docker run -i
- # tty: true # docker run -t
- # volumes:
- # - ./models/:/data
- # 70b
- # meta-llama/Llama-2-70b-chat-hf
- # 70b q
- # TheBloke/Llama-2-70B-chat-AWQ
- # 7b
- # meta-llama/Llama-2-7b-chat-hf
- # mistralai/Mistral-7B-Instruct-v0.1
- # 7b q
- # TheBloke/Mistral-7B-Instruct-v0.1-AWQ
- # TheBloke/Llama-2-7b-Chat-AWQ
- # daryl149/llama-2-7b-chat-hf
- # georgesung/llama2_7b_chat_uncensored
|