choozmo
/
ai_tools


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
							version: "3"

services:

  huggingface_inference:

    image: ghcr.io/huggingface/text-generation-inference:1.1.0

    volumes:

      - ./models/:/data

    ports:

      - "3000:80"

    shm_size: '1gb'

    command: --model-id meta-llama/Llama-2-7b-chat-hf --sharded true --num-shards 2

#    --json-output

    environment:

      # - num_shard=1

      - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}

      - DISABLE_CUSTOM_KERNELS=${DISABLE_CUSTOM_KERNELS}

      - HF_HUB_ENABLE_HF_TRANSFER={HF_HUB_ENABLE_HF_TRANSFER}

    deploy:

      resources:

        reservations:

          devices:

            - driver: nvidia

              count: all

              capabilities: [gpu]

    working_dir: /app


  mongo_chatui:

    image: mongo:latest

    ports:

      - "27017:27017"


  chat_ui:

    build:

      context: ./chat-ui

      dockerfile: Dockerfile

    command: >

      sh -c "npm run dev -- --host"

    volumes:

      - ./chat-ui/.env.local:/app/chat-ui/.env.local

    ports:

      - "5173:5173"

    depends_on:

      - mongo_chatui

      - huggingface_inference


#  llamacpp:

#    image: ghcr.io/ggerganov/llama.cpp:full-cuda

  # debug:

  #   image: ubuntu:latest

  #   entrypoint: /bin/sh

  #   stdin_open: true # docker run -i

  #   tty: true        # docker run -t

  #   volumes:

  #     - ./models/:/data


# 70b

# meta-llama/Llama-2-70b-chat-hf


# 70b q

# TheBloke/Llama-2-70B-chat-AWQ


# 7b

# meta-llama/Llama-2-7b-chat-hf

# mistralai/Mistral-7B-Instruct-v0.1


# 7b q

# TheBloke/Mistral-7B-Instruct-v0.1-AWQ

# TheBloke/Llama-2-7b-Chat-AWQ


# daryl149/llama-2-7b-chat-hf

# georgesung/llama2_7b_chat_uncensored