loftyoutcome · murbans1 · Oct 16, 2024 · Oct 18, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/.github/actions/setup-docker/action.yaml b/.github/actions/setup-docker/action.yaml
@@ -0,0 +1,22 @@
+name: "Set Up Docker"
+description: "Set up Docker"
+
+inputs:
+  docker-username:
+    description: "DockerHub username"
+    required: true
+  docker-password:
+    description: "DockerHub password"
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Log into DockerHub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ inputs.docker-username }}
+        password: ${{ inputs.docker-password }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,77 @@
+name: Build and Release Images
+
+on:
+  push:
+    tags:
+      - v*
+  workflow_dispatch:
+    inputs:
+      tags:
+        description: 'Tags'
+env:
+  AWS_REGION: "us-east-1"
+
+permissions:
+  contents: read
+  pull-requests: read
+  repository-projects: read
+
+jobs:
+  release-images:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check permissions using GitHub CLI
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          permission=$(gh api repos/${{ github.repository }}/collaborators/${{ github.actor }}/permission --jq '.permission')
+          if [ "$permission" = "admin" ]; then
+            echo "Has admin access"
+            # Your workflow steps here
+          else
+            echo "Permission denied"
+            exit 1
+          fi
+
+      - name: Remove software and language runtimes we're not using
+        run: |
+          sudo rm -rf /usr/share/swift
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/share/powershell
+          sudo rm -rf /usr/local/share/chromium
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/local/lib/node_modules
+          sudo rm -rf /usr/local/julia*
+          sudo rm -rf /opt/google/chrome
+          df . -h
+
+      - name: Check out repository
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: '0'
+
+      - name: Fetch all tags
+        run: git fetch origin +refs/tags/*:refs/tags/*
+
+      - name: Set up Docker
+        uses: ./.github/actions/setup-docker
+        with:
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Set tag
+        run: |
+          TAG=$(git describe --tags --match "v*" --abbrev=0)
+          echo "TAG=$TAG" >> $GITHUB_ENV
+
+      - name: Build and push image - createvectordb
+        run: |
+          ./dockers/llm.vdb.service/makeDocker.sh elotl/createvectordb ${{ env.TAG }}
+
+      - name: Build and push image - llm-chat
+        run: |
+          ./dockers/llm.chatui.service/makeDocker.sh elotl/llm-chat ${{ env.TAG }}
+
+      - name: Build and push image - serveragllm
+        run: |
+          ./dockers/llm.rag.service/makeDocker.sh elotl/serveragllm ${{ env.TAG }}
diff --git a/README.md b/README.md
@@ -1,2 +1,40 @@
-# k8s-rag-llm
-Deployment of RAG + LLM model serving on multiple K8s cloud clusters
+# Question-Answer Chatbot with Self-hosted LLMs & RAG
+
+- Setup the complete infrastructure stack for a Question-Answer chatbot for your private data in just a few minutes!
+- Your stack will be powered by Self-hosted Open-Source Large Language Models and Retrieval Augmented Generation running on Kubernetes Cloud clusters.
+
+## Overview
+
+The Question-Answer Chatbot is powered by these technologies:
+
+1. Open-Source [Large Language Models](https://en.wikipedia.org/wiki/Large_language_model)
+2. [Retrieval Augmented Generation (RAG)](https://en.wikipedia.org/wiki/Retrieval-augmented_generation)
+3. [Vector Stores](https://en.wikipedia.org/wiki/Vector_database)
+4. [Ray AI/ML compute framework](https://www.ray.io/)
+5. [Elotl Luna](https://www.elotl.co/luna.html)
+
+<img src="./diagrams/elotl_genai_infrastack.png" alt="elotl_genai_stack_enduser" width="400"/>
+
+## Retrieval Augmented Generation
+
+The graphic below shows how RAG is used to determine an answer to the end-user's question about a specific knowledge base.
+
+<center>
+<img src="./diagrams/elotl_genai_stack_enduser.png" alt="elotl_genai_stack_enduser" width="600"/>
+</center>
+
+## Installation
+
+* [Cluster Setup Summary](docs/install.md#cluster-setup-summary)
+* [Install Infrastructure Tools](docs/install.md#install-infrastructure-tools)
+* [Install Model Serve Stack](docs/install.md#install-model-serve-stack)
+* [Model Serving](docs/install.md#model-serve)
+* [Retrieval Augmented Generation using FAISS](docs/install.md#retrieval-augmented-generation-rag-using-faiss)
+* [Creation of the Vector Store](docs/install.md#creation-of-the-vector-store)
+* [Install the RAG & LLM querying service](docs/install.md#setup-rag--llm-service)
+* [Send a question to your LLM with RAG](docs/install.md#query-the-llm-with-rag)
+* [Query your LLM with RAG using a Chat UI](docs/install.md#query-the-llm-with-rag-using-a-chat-ui)
+* [Uninstall](docs/install.md#uninstall)
+
+Jump to complete install doc available [here](docs/install.md).
+
diff --git a/demo/llm.chatui.service/auth-proxy.yml b/demo/llm.chatui.service/auth-proxy.yml
@@ -0,0 +1,93 @@
+# nginx-auth-proxy-config.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: nginx-auth-proxy-config
+data:
+  nginx.conf: |
+    events {
+      worker_connections 1024;
+    }
+    http {
+      server {
+        listen 80;
+
+        location / {
+          auth_basic "Restricted Access";
+          auth_basic_user_file /etc/nginx/auth/.htpasswd;
+
+          proxy_pass http://simple-chat-service.default.svc.cluster.local:7860;  # Points to our simple chat service
+          proxy_set_header Host $host;
+          proxy_set_header X-Real-IP $remote_addr;
+          proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+          proxy_set_header X-Forwarded-Proto $scheme;
+        }
+      }
+    }
+
+---
+# auth-secret.yaml
+apiVersion: v1
+kind: Secret
+metadata:
+  name: auth-proxy-credentials
+type: Opaque
+data:
+  # Generated using: htpasswd -c .htpasswd username
+  # Then base64 encode the file content
+  # htpasswd -c .htpasswd your_chosen_username
+  # cat .htpasswd | base64
+  # myuser:elotl
+
+  .htpasswd: ZWxvdGw6JGFwcjEkRmtKeUFMWjMkYjd5WXdBdmhHbmtTSjN2QTdCOXlGMAo=
+
+---
+# auth-proxy-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: auth-proxy
+spec:
+  replicas: 2  # For high availability
+  selector:
+    matchLabels:
+      app: auth-proxy
+  template:
+    metadata:
+      labels:
+        app: auth-proxy
+    spec:
+      volumes:
+      - name: nginx-config
+        configMap:
+          name: nginx-auth-proxy-config
+      - name: auth-volume
+        secret:
+          secretName: auth-proxy-credentials
+      containers:
+      - name: nginx
+        image: nginx:alpine
+        ports:
+        - containerPort: 80
+        volumeMounts:
+        - name: nginx-config
+          mountPath: /etc/nginx/nginx.conf
+          subPath: nginx.conf
+        - name: auth-volume
+          mountPath: /etc/nginx/auth
+          readOnly: true
+
+---
+# auth-proxy-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: auth-proxy-service
+spec:
+  type: LoadBalancer
+  ports:
+  - port: 80
+    targetPort: 80
+    protocol: TCP
+  selector:
+    app: auth-proxy
diff --git a/demo/llm.chatui.service/pv-and-pvc.yaml b/demo/llm.chatui.service/pv-and-pvc.yaml
@@ -0,0 +1,23 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: simple-chat-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  hostPath:
+    path: /mnt/data/simple-chat-logs
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: simple-chat-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 20Gi
diff --git a/demo/llm.chatui.service/simple-chat.yaml b/demo/llm.chatui.service/simple-chat.yaml
@@ -0,0 +1,57 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: simple-chat
+  labels:
+    app: simple-chat
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: simple-chat
+  template:
+    metadata:
+      labels:
+        app: simple-chat
+        elotl-luna: "true"
+      annotations:
+        node.elotl.co/instance-type-regexp: "^(t3.xlarge|n2-standard-4)$"
+    spec:
+      containers:
+      - name: chat
+        image: elotl/llm-chat:v1.3.12
+        imagePullPolicy: Always
+        ports:
+        - containerPort: 7860
+        env:
+        - name: RAG_LLM_QUERY_URL
+          value: "http://serveragllm-service.default.svc.cluster.local:8000"
+        - name: USE_CHATBOT_HISTORY
+          value: "True"
+        resources:
+          requests:
+            cpu: "200m"
+            memory: "256Mi"
+          limits:
+            cpu: "500m"
+            memory: "512Mi"
+        volumeMounts:
+        - name: log-storage
+          mountPath: /app/logs
+      volumes:
+      - name: log-storage
+        persistentVolumeClaim:
+          claimName: simple-chat-pvc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: simple-chat-service
+spec:
+  selector:
+    app: simple-chat
+  ports:
+    - protocol: TCP
+      port: 7860
+      targetPort: 7860
+  type: ClusterIP
diff --git a/demo/llm.gpu.service/block_device_mapping.json b/demo/llm.gpu.service/block_device_mapping.json
@@ -1,6 +1,6 @@
 [
   {
-    "DeviceName": "/dev/xvda", 
+    "DeviceName": "/dev/xvda",
     "Ebs": {
       "DeleteOnTermination": true,
       "VolumeSize": 80,

diff --git a/demo/llm.gpu.service/block_device_mapping_bottlerocket.json b/demo/llm.gpu.service/block_device_mapping_bottlerocket.json
@@ -0,0 +1,21 @@
+[
+  {
+    "DeviceName": "/dev/xvda",
+    "Ebs": {
+      "DeleteOnTermination": true,
+      "VolumeSize": 80,
+      "VolumeType": "gp3",
+      "Encrypted": false
+    }
+  },
+  {
+    "DeviceName": "/dev/xvdb",
+    "Ebs": {
+      "DeleteOnTermination": true,
+      "VolumeSize": 80,
+      "VolumeType": "gp3",
+      "Encrypted": false,
+      "SnapshotId": "snap-09946d545033d96f7"
+    }
+  }
+]
diff --git a/demo/llm.gpu.service/get-user-data.sh b/demo/llm.gpu.service/get-user-data.sh
@@ -0,0 +1,4 @@
+clustername=$1
+region=$2
+eksctl get cluster --region $region --name $clustername -o json \
+   | jq --raw-output '.[] | "settings.kubernetes.api-server = \"" + .Endpoint + "\"\nsettings.kubernetes.cluster-certificate =\"" + .CertificateAuthority.Data + "\"\n"' > user-data.toml