diff --git a/.github/workflows/benchmark_cpu_onnxruntime.yaml b/.github/workflows/benchmark_cpu_onnxruntime.yaml
index 6d40a5c..999e975 100644
--- a/.github/workflows/benchmark_cpu_onnxruntime.yaml
+++ b/.github/workflows/benchmark_cpu_onnxruntime.yaml
@@ -2,8 +2,14 @@ name: Benchmark CPU Onnxruntime
 
 on:
   workflow_dispatch:
+    inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
-    - cron: "0 12 * * 3"
+    - cron: "0 12 * * *"
   pull_request:
 
 concurrency:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_onnxruntime')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cpu_onnxruntime') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cpu') ||
         contains(github.event.pull_request.labels.*.name, 'onnxruntime') ||
-        contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime')
+        contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -48,6 +57,7 @@ jobs:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           DISABLE_WARNINGS: 1
           BENCHMARK_TOP_N: 3
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -60,6 +70,7 @@ jobs:
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env DISABLE_WARNINGS
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
diff --git a/.github/workflows/benchmark_cpu_openvino.yaml b/.github/workflows/benchmark_cpu_openvino.yaml
index 9e4c70f..1dab78b 100644
--- a/.github/workflows/benchmark_cpu_openvino.yaml
+++ b/.github/workflows/benchmark_cpu_openvino.yaml
@@ -2,8 +2,14 @@ name: Benchmark CPU OpenVINO
 
 on:
   workflow_dispatch:
+     inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
-    - cron: "0 0 * * *"
+    - cron: "0 6 * * *"
   pull_request:
 
 concurrency:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_openvino')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cpu_openvino') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cpu') ||
         contains(github.event.pull_request.labels.*.name, 'openvino') ||
-        contains(github.event.pull_request.labels.*.name, 'cpu_openvino')
+        contains(github.event.pull_request.labels.*.name, 'cpu_openvino') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -48,6 +57,7 @@ jobs:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           DISABLE_WARNINGS: 1
           BENCHMARK_TOP_N: 50
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -60,6 +70,7 @@ jobs:
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env DISABLE_WARNINGS
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
diff --git a/.github/workflows/benchmark_cpu_pytorch.yaml b/.github/workflows/benchmark_cpu_pytorch.yaml
index d0ca50a..a287606 100644
--- a/.github/workflows/benchmark_cpu_pytorch.yaml
+++ b/.github/workflows/benchmark_cpu_pytorch.yaml
@@ -2,6 +2,12 @@ name: Benchmark CPU PyTorch
 
 on:
   workflow_dispatch:
+    inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
     - cron: "0 0 * * *"
   pull_request:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_pytorch')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cpu_pytorch') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cpu') ||
         contains(github.event.pull_request.labels.*.name, 'pytorch') ||
-        contains(github.event.pull_request.labels.*.name, 'cpu_pytorch')
+        contains(github.event.pull_request.labels.*.name, 'cpu_pytorch') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -47,6 +56,7 @@ jobs:
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           BENCHMARK_TOP_N: 50
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -58,6 +68,7 @@ jobs:
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
diff --git a/.github/workflows/benchmark_cuda_pytorch.yaml b/.github/workflows/benchmark_cuda_pytorch.yaml
index d87204b..d04f8d0 100644
--- a/.github/workflows/benchmark_cuda_pytorch.yaml
+++ b/.github/workflows/benchmark_cuda_pytorch.yaml
@@ -2,8 +2,14 @@ name: Benchmark CUDA PyTorch
 
 on:
   workflow_dispatch:
+    inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
-    - cron: "0 3 * * 0"
+    - cron: "0 0 * * *"
   pull_request:
 
 concurrency:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cuda_pytorch')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cuda_pytorch') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cuda') ||
         contains(github.event.pull_request.labels.*.name, 'pytorch') ||
-        contains(github.event.pull_request.labels.*.name, 'cuda_pytorch')
+        contains(github.event.pull_request.labels.*.name, 'cuda_pytorch') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -51,6 +60,7 @@ jobs:
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           BENCHMARK_TOP_N: 50
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -63,6 +73,7 @@ jobs:
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
diff --git a/.gitignore b/.gitignore
index 3fcc481..6800897 100644
--- a/.gitignore
+++ b/.gitignore
@@ -191,3 +191,4 @@ optimum-benchmark/
 
 *.egg-info/
 data/
+load_model_codecarbon.json
\ No newline at end of file
diff --git a/Makefile b/Makefile
index dc0ad56..f9ed5f1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,11 @@
+# Load environment variables
+ifneq (,$(wildcard .env))
+    include .env
+    export
+endif
+
 # Style and Quality checks
-.PHONY: style quality install install-dev run_cpu_container run_cuda_container run_rocm_container cpu-pytorch-container cpu-openvino-container collector-container
+.PHONY: style quality run_cpu_container run_cuda_container run_rocm_container cpu-pytorch-container cpu-openvino-container collector-container help
 
 quality:
 	ruff check .
@@ -9,31 +15,46 @@ style:
 	ruff format .
 	ruff check --fix .
 
-install:
-	pip install .
-
-install-dev:
-	DEBUG=1 uv pip install -e .
-
 # Running optimum-benchmark containers
-run_cpu_container:
+run-optimum-benchmark-cpu-container:
 	docker run -it --rm --pid host --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cpu
 
-run_cuda_container:
+run-optimum-benchmark-cuda-container:
 	docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cuda
 
-run_rocm_container:
+run-optimum-benchmark-rocm-container:
 	docker run -it --rm --shm-size 64G --device /dev/kfd --device /dev/dri --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-rocm
 
-# Running llm-perf backend containers
-cpu-pytorch-container:
-	docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
-	docker run -it --rm --pid host cpu-pytorch
-
-cpu-openvino-container:
-	docker build -t cpu-openvino -f docker/cpu-openvino/Dockerfile .
-	docker run -it --rm --pid host cpu-openvino
+# Running llm-perf-leaderboard benchmarks
+run-llm-perf-benchmark-cpu-pytorch:
+	docker build -t llm-perf-backend-cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
+	docker run -it --rm --pid host llm-perf-backend-cpu-pytorch
+
+run-llm-perf-benchmark-cpu-openvino:
+	docker build -t llm-perf-backend-cpu-openvino -f docker/cpu-openvino/Dockerfile .
+	docker run -it --rm --pid host llm-perf-backend-cpu-openvino
+
+run-llm-perf-benchmark-cuda-pytorch:
+	docker build -t llm-perf-backend-cuda-pytorch -f docker/gpu-cuda/Dockerfile .
+	docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend llm-perf-backend-cuda-pytorch
+
+run-llm-perf-benchmark-collector:
+	docker build -t llm-perf-backend-collector -f docker/collector/Dockerfile .
+	docker run -it --rm --pid host llm-perf-backend-collector
+
+help:
+	@echo "Commands:"
+	@echo "  style                    - Format code and fix style issues"
+	@echo "  quality                  - Run style checks without fixing"
+	@echo ""
+	@echo "Optimum Benchmark Containers:"
+	@echo "  run-optimum-benchmark-cpu-container   - Run CPU container"
+	@echo "  run-optimum-benchmark-cuda-container  - Run CUDA container"
+	@echo "  run-optimum-benchmark-rocm-container  - Run ROCm container"
+	@echo ""
+	@echo "LLM Performance Backend Containers:"
+	@echo "  run-llm-perf-benchmark-cpu-pytorch   - Run the llm-perf-leaderboard Benchmark CPU PyTorch"
+	@echo "  run-llm-perf-benchmark-cpu-openvino  - Run the llm-perf-leaderboard Benchmark CPU OpenVINO"
+	@echo "  run-llm-perf-benchmark-cuda-pytorch  - Run the llm-perf-leaderboard Benchmark CUDA PyTorch"
+	@echo "  run-llm-perf-benchmark-collector     - Run the llm-perf-leaderboard Collector container"
 
-collector-container:
-	docker build -t collector -f docker/collector/Dockerfile .
-	docker run -it --rm --pid host collector
diff --git a/README.md b/README.md
index 7c69305..d25bd97 100644
--- a/README.md
+++ b/README.md
@@ -23,8 +23,7 @@ LLM-perf Backend is designed to:
 
 ## Installation 🛠️
 
-1. Clone the repository:
-```bash
+1. Clone the repository:```bash
 git clone https://github.com/huggingface/llm-perf-backend
 cd llm-perf-backend
 ```
@@ -53,7 +52,6 @@ llm-perf run-benchmark --hardware cpu --backend pytorch
 ```
 
 ### Configuration Options
-
 View all the options with
 ```bash
 llm-perf run-benchmark --help
@@ -62,6 +60,18 @@ llm-perf run-benchmark --help
 - `--hardware`: Target hardware platform (cpu, cuda)
 - `--backend`: Backend framework to use (pytorch, onnxruntime, etc.)
 
+### (Optional) Running Benchmarks via Docker
+
+You can run the benchmarks using the following make commands:
+
+```bash
+# CPU Benchmarks
+make run-llm-perf-benchmark-cpu-pytorch     # Run PyTorch CPU benchmark
+make run-llm-perf-benchmark-cpu-openvino    # Run OpenVINO CPU benchmark
+
+# GPU Benchmarks
+make run-llm-perf-benchmark-cuda-pytorch    # Run PyTorch CUDA benchmark
+
 ## Benchmark Dataset 📊
 
 Results are published to the official dataset:
@@ -75,4 +85,5 @@ All benchmarks follow these standardized settings:
 - Memory tracking:
   - Maximum allocated memory
   - Maximum reserved memory
-  - Maximum used memory (via PyNVML for GPU)
\ No newline at end of file
+  - Maximum used memory (via PyNVML for GPU)
+
diff --git a/dashboard/main.py b/dashboard/main.py
new file mode 100644
index 0000000..df3fd88
--- /dev/null
+++ b/dashboard/main.py
@@ -0,0 +1,4 @@
+# -> need to view on the indvidual runs to get details
+# -> get stats about the latest runs for all the hardware.yml
+# -> get stats on the latest github actions
+# -> get the stats on the top 50 models
diff --git a/dashboard/requirements.txt b/dashboard/requirements.txt
new file mode 100644
index 0000000..e69de29
diff --git a/docker/cpu-onnxruntime/Dockerfile b/docker/cpu-onnxruntime/Dockerfile
new file mode 100644
index 0000000..03ec92d
--- /dev/null
+++ b/docker/cpu-onnxruntime/Dockerfile
@@ -0,0 +1,11 @@
+FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
+
+WORKDIR /workspace
+
+COPY setup.py .
+
+RUN pip install -e .[onnxruntime]
+
+COPY . .
+
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "onnxruntime"]
diff --git a/docker/cpu-openvino/Dockerfile b/docker/cpu-openvino/Dockerfile
index 2f88e1e..4446353 100644
--- a/docker/cpu-openvino/Dockerfile
+++ b/docker/cpu-openvino/Dockerfile
@@ -3,10 +3,9 @@ FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
 WORKDIR /workspace
 
 COPY setup.py .
-# COPY pyproject.toml .
 
 RUN pip install -e .[openvino]
 
 COPY . .
 
-CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
diff --git a/docker/cpu-pytorch/Dockerfile b/docker/cpu-pytorch/Dockerfile
index f6e3cc7..76052d0 100644
--- a/docker/cpu-pytorch/Dockerfile
+++ b/docker/cpu-pytorch/Dockerfile
@@ -2,8 +2,10 @@ FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
 
 WORKDIR /workspace
 
-COPY . .
+COPY setup.py .
 
 RUN pip install -e .
 
-CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
+COPY . .
+
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
diff --git a/docker/gpu-cuda/Dockerfile b/docker/gpu-cuda/Dockerfile
new file mode 100644
index 0000000..3e503b4
--- /dev/null
+++ b/docker/gpu-cuda/Dockerfile
@@ -0,0 +1,12 @@
+FROM ghcr.io/huggingface/optimum-benchmark:latest-cuda
+
+WORKDIR /workspace
+
+COPY setup.py .
+
+RUN pip install -e .[cuda] \
+    && pip install flash-attn --no-build-isolation
+
+COPY . .
+
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cuda", "--backend", "pytorch"]
diff --git a/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py b/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py
index 2d8861e..eac63b5 100644
--- a/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py
+++ b/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py
@@ -19,9 +19,9 @@ def __init__(self):
         super().__init__(backend="onnxruntime", device="cpu")
 
         self.attention_configs = self._get_attention_configs()
-        assert (
-            self.subset is not None
-        ), "SUBSET environment variable must be set for benchmarking"
+        assert self.subset is not None, (
+            "SUBSET environment variable must be set for benchmarking"
+        )
         self.weights_configs = self._get_weights_configs(self.subset)
 
     def get_list_of_benchmarks_to_run(self) -> List[Dict[str, Any]]:
@@ -47,9 +47,9 @@ def get_benchmark_config(self, model: str, **kwargs) -> BenchmarkConfig:
         weights_config = kwargs["weights_config"]
         attn_implementation = kwargs["attn_implementation"]
 
-        assert (
-            weights_config in self.weights_configs
-        ), f"your config does not contain {weights_config}, adjust your _get_weights_configs to fix this issue"
+        assert weights_config in self.weights_configs, (
+            f"your config does not contain {weights_config}, adjust your _get_weights_configs to fix this issue"
+        )
 
         torch_dtype = self.weights_configs[weights_config]["torch_dtype"]
         quant_config = self.weights_configs[weights_config]["quant_config"]
diff --git a/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py b/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py
index 0a1f771..0d7e3f8 100644
--- a/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py
+++ b/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py
@@ -19,9 +19,9 @@ def __init__(self):
         super().__init__(backend="openvino", device="cpu")
 
         self.attention_configs = self._get_attention_configs()
-        assert (
-            self.subset is not None
-        ), "SUBSET environment variable must be set for benchmarking"
+        assert self.subset is not None, (
+            "SUBSET environment variable must be set for benchmarking"
+        )
         self.weights_configs = self._get_weights_configs(self.subset)
 
     def get_list_of_benchmarks_to_run(self) -> List[Dict[str, Any]]:
@@ -47,9 +47,9 @@ def get_benchmark_config(self, model: str, **kwargs) -> BenchmarkConfig:
         weights_config = kwargs["weights_config"]
         attn_implementation = kwargs["attn_implementation"]
 
-        assert (
-            weights_config in self.weights_configs
-        ), f"your config does not contain {weights_config}, adjust your _get_weights_configs to fix this issue"
+        assert weights_config in self.weights_configs, (
+            f"your config does not contain {weights_config}, adjust your _get_weights_configs to fix this issue"
+        )
 
         quant_config = self.weights_configs[weights_config]["quant_config"]
 
diff --git a/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_pytorch.py b/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_pytorch.py
index c14cf6b..1c4440f 100644
--- a/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_pytorch.py
+++ b/llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_pytorch.py
@@ -19,9 +19,9 @@ def __init__(self):
         super().__init__(backend="pytorch", device="cpu")
 
         self.attention_configs = self._get_attention_configs()
-        assert (
-            self.subset is not None
-        ), "SUBSET environment variable must be set for benchmarking"
+        assert self.subset is not None, (
+            "SUBSET environment variable must be set for benchmarking"
+        )
         self.weights_configs = self._get_weights_configs(self.subset)
 
     def get_list_of_benchmarks_to_run(self) -> List[Dict[str, Any]]:
@@ -47,9 +47,9 @@ def get_benchmark_config(self, model: str, **kwargs) -> BenchmarkConfig:
         weights_config = kwargs["weights_config"]
         attn_implementation = kwargs["attn_implementation"]
 
-        assert (
-            weights_config in self.weights_configs
-        ), f"your config does not contain {weights_config}, adjust your _get_weights_configs to fix this issue"
+        assert weights_config in self.weights_configs, (
+            f"your config does not contain {weights_config}, adjust your _get_weights_configs to fix this issue"
+        )
 
         torch_dtype = self.weights_configs[weights_config]["torch_dtype"]
         quant_scheme = self.weights_configs[weights_config]["quant_scheme"]
diff --git a/llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py b/llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py
index fef89c9..b2f717b 100644
--- a/llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py
+++ b/llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py
@@ -19,9 +19,9 @@ def __init__(self):
         super().__init__(backend="pytorch", device="cuda")
 
         self.attention_configs = self._get_attention_configs()
-        assert (
-            self.subset is not None
-        ), "SUBSET environment variable must be set for benchmarking"
+        assert self.subset is not None, (
+            "SUBSET environment variable must be set for benchmarking"
+        )
         self.weights_configs = self._get_weights_configs(self.subset)
 
     def get_list_of_benchmarks_to_run(self) -> List[Dict[str, Any]]:
@@ -55,9 +55,9 @@ def get_benchmark_config(self, model: str, **kwargs) -> BenchmarkConfig:
         weights_config = kwargs["weights_config"]
         attn_implementation = kwargs["attn_implementation"]
 
-        assert (
-            weights_config in self.weights_configs
-        ), f"your config does contains the {weights_config}, adjust your _get_weights_configs to fix this issue"
+        assert weights_config in self.weights_configs, (
+            f"your config does contains the {weights_config}, adjust your _get_weights_configs to fix this issue"
+        )
 
         torch_dtype = self.weights_configs[weights_config]["torch_dtype"]
         quant_scheme = self.weights_configs[weights_config]["quant_scheme"]
@@ -206,7 +206,7 @@ def _get_weights_configs(self, subset) -> Dict[str, Dict[str, Any]]:
             raise ValueError(f"Unknown subset: {subset}")
 
     def _get_attention_configs(self) -> List[str]:
-        return ["eager", "sdpa", "flash_attention_2"]
+        return ["eager", "sdpa"]
 
 
 if __name__ == "__main__":
diff --git a/llm_perf/cli.py b/llm_perf/cli.py
index 6993744..ec42640 100644
--- a/llm_perf/cli.py
+++ b/llm_perf/cli.py
@@ -20,9 +20,13 @@
 
 from llm_perf.update_llm_perf_leaderboard import update_llm_perf_leaderboard
 
+from loguru import logger
+
 if os.environ.get("DISABLE_WARNINGS", "0") == "1":
     warnings.filterwarnings("ignore")
 
+os.environ["CI"] = "GITHUB_ACTIONS"
+
 app = typer.Typer()
 
 
@@ -46,9 +50,9 @@ def run_benchmark(
 ):
     env_vars = load_dotenv()
     if env_vars:
-        print("Environment variables loaded successfully")
+        logger.info("Environment variables loaded successfully")
     else:
-        print("No environment variables loaded")
+        logger.info("No environment variables loaded")
 
     if hardware == Hardware.CPU:
         if backend == Backend.ONNXRUNTIME:
@@ -61,7 +65,7 @@ def run_benchmark(
         if backend == Backend.PYTORCH:
             runner = CUDAPyTorchBenchmarkRunner()
         else:
-            typer.echo(f"CUDA is not supported for {backend} backend")
+            logger.error(f"CUDA is not supported for {backend} backend")
             raise typer.Exit(code=1)
 
     runner.run_benchmarks()
@@ -72,5 +76,19 @@ def update_leaderboard():
     update_llm_perf_leaderboard()
 
 
+@app.command()
+def launch_dashboard(
+    port: int = typer.Option(7860, help="Port to run the dashboard on"),
+    share: bool = typer.Option(False, help="Whether to create a public URL"),
+):
+    """Launch the LLM Performance Dashboard."""
+    from llm_perf.dashboard_app import DashboardApp
+
+    logger.info(f"Starting dashboard on port {port}")
+
+    app = DashboardApp()
+    app.launch()
+
+
 if __name__ == "__main__":
     app()
diff --git a/llm_perf/common/benchmark_runner.py b/llm_perf/common/benchmark_runner.py
index e6ecac8..33de8ca 100644
--- a/llm_perf/common/benchmark_runner.py
+++ b/llm_perf/common/benchmark_runner.py
@@ -1,17 +1,22 @@
 import os
+import sys
 import traceback
 from abc import ABC, abstractmethod
-from logging import getLogger
 from typing import Any, Dict, List, Optional
+import subprocess
+import time
+import uuid
+from datetime import datetime
 
+from loguru import logger
 from optimum_benchmark import Benchmark, BenchmarkConfig, BenchmarkReport
-from optimum_benchmark.logging_utils import setup_logging
 
 from llm_perf.common.utils import (
     CANONICAL_PRETRAINED_OPEN_LLM_LIST,
-    OPEN_LLM_LIST,
-    PRETRAINED_OPEN_LLM_LIST,
 )
+from llm_perf.common.memory_utils import log_memory_usage
+from llm_perf.common.dashboard import BenchmarkRunDetails
+from llm_perf.common.dashboard_manager import DashboardManager
 
 
 class LLMPerfBenchmarkManager(ABC):
@@ -26,7 +31,7 @@ def __init__(
         self.device = device
         self.subset = subset or os.getenv("SUBSET", None)
         self.machine = machine or os.getenv("MACHINE", None)
-        self.logger = getLogger("llm-perf-backend")
+        self.dashboard_manager = DashboardManager()
 
         if self.machine is None and self.subset is None:
             self.push_repo_id = (
@@ -34,6 +39,7 @@ def __init__(
             )
             self.canonical_pretrained_open_llm_list = ["gpt2"]
             self.subset = "unquantized"
+            self.machine = "debug"  # Set a default machine name for debug mode
         elif self.machine is not None and self.subset is not None:
             self.push_repo_id = f"optimum-benchmark/llm-perf-{self.backend}-{self.device}-{self.subset}-{self.machine}"
         else:
@@ -41,12 +47,8 @@ def __init__(
                 "Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging"
             )
 
-        self.logger.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}")
-        self.logger.info(
-            f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}"
-        )
-        self.logger.info(
-            f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}"
+        logger.info(
+            f"Starting benchmark runner with backend: {self.backend}, device: {self.device}, subset: {self.subset}, machine: {self.machine}"
         )
 
     @abstractmethod
@@ -73,22 +75,188 @@ def get_list_of_benchmarks_to_run(self) -> List[Dict[str, Any]]:
             "This method should be implemented in the child class"
         )
 
-    def run_benchmarks(self):
-        os.environ["LOG_TO_FILE"] = "0"
-        os.environ["LOG_LEVEL"] = "INFO"
-        setup_logging(level="INFO", prefix="MAIN-PROCESS")
+    def run_single_benchmark_in_subprocess(
+        self, model: str, run_id: str, run_start_time: str, **kwargs
+    ) -> bool:
+        """Run a single benchmark in a separate process"""
+        try:
+            # Create the Python script to run in subprocess
+            script = f"""
+import sys
+import os
+from {self.__class__.__module__} import {self.__class__.__name__}
+from loguru import logger
+import traceback
+
+try:
+    runner = {self.__class__.__name__}()
+    
+    runner.run_benchmark(model="{model}", **{kwargs})
+    sys.exit(0)
+except Exception:
+    logger.error("Error in subprocess:" + "\\n" + traceback.format_exc())
+    sys.exit(1)
+"""
+
+            # Run the subprocess with timeout
+            result = subprocess.run(
+                [sys.executable, "-c", script],
+                text=True,
+                env={
+                    **os.environ,
+                    "PYTHONUNBUFFERED": "1",
+                    "LOG_TO_FILE": "0",  # Disable file logging for optimum-benchmark
+                    "BENCHMARK_RUN_ID": run_id,
+                    "BENCHMARK_START_TIME": run_start_time,
+                },
+                timeout=3600,  # 1 hour timeout
+            )
+
+            return result.returncode == 0
+
+        except subprocess.TimeoutExpired:
+            logger.error(f"Benchmark timed out for model {model}")
+            return False
+        except Exception:
+            logger.error(
+                "Failed to run benchmark process:" + "\n" + traceback.format_exc()
+            )
+            return False
 
+    def run_benchmarks(self):
+        """Run all benchmarks sequentially with process isolation"""
         benchmarks_to_run = self.get_list_of_benchmarks_to_run()
 
-        self.logger.info(
+        logger.info(
             f"Running a total of {len(benchmarks_to_run)} benchmarks, "
             f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models"
         )
 
-        for benchmark_name in benchmarks_to_run:
-            assert "model" in benchmark_name, "each benchmark should have a model"
+        logger.info(
+            f"Models that are being benchmarked: {CANONICAL_PRETRAINED_OPEN_LLM_LIST}"
+        )
+
+        rerun_already_conducted_benchmarks = (
+            os.getenv("RERUN_ALREADY_CONDUCTED_BENCHMARKS", "false") == "true"
+        )
+
+        total_benchmarks = len(benchmarks_to_run)
+        completed_benchmarks = 0
+        failed_benchmarks = 0
+        skipped_benchmarks = 0
+        failed_models = []
+        start_time = time.time()
+
+        # Generate run ID and start time for this benchmark session
+        run_id = str(uuid.uuid4())
+        run_start_time = datetime.now().isoformat()
+
+        for benchmark_config in benchmarks_to_run:
+            try:
+                # Log memory before benchmark
+                logger.info("Memory usage before benchmark:")
+                log_memory_usage("before")
+
+                model = benchmark_config.pop("model")  # Remove model from kwargs
+                benchmark_name = self.get_benchmark_name(model, **benchmark_config)
+                subfolder = f"{benchmark_name}/{model.replace('/', '--')}"
+
+                if not rerun_already_conducted_benchmarks:
+                    if self.is_benchmark_conducted(self.push_repo_id, subfolder):
+                        logger.info(
+                            f"Skipping already conducted benchmark: {benchmark_name}"
+                        )
+                        benchmark_config["model"] = model  # Restore model key
+                        completed_benchmarks += 1
+                        skipped_benchmarks += 1
+                        success_rate = (
+                            (
+                                (completed_benchmarks - failed_benchmarks)
+                                / completed_benchmarks
+                            )
+                            * 100
+                            if completed_benchmarks > 0
+                            else 100
+                        )
+                        logger.info(
+                            f"\nProgress: {completed_benchmarks}/{total_benchmarks} benchmarks completed ({(completed_benchmarks / total_benchmarks) * 100:.1f}%) - Current success rate: {success_rate:.1f}%\n"
+                        )
+                        continue
+
+                logger.info(
+                    f"Starting benchmark for model {model} with config: {benchmark_config}"
+                )
+
+                # Run the benchmark in a separate process
+                success = self.run_single_benchmark_in_subprocess(
+                    model=model,
+                    run_id=run_id,
+                    run_start_time=run_start_time,
+                    **benchmark_config,
+                )
+
+                if not success:
+                    logger.error(f"Benchmark failed for model {model}")
+                    failed_benchmarks += 1
+                    failed_models.append(model)
 
-            self.run_benchmark(**benchmark_name)
+                completed_benchmarks += 1
+                success_rate = (
+                    ((completed_benchmarks - failed_benchmarks) / completed_benchmarks)
+                    * 100
+                    if completed_benchmarks > 0
+                    else 100
+                )
+                logger.info(
+                    f"\nProgress: {completed_benchmarks}/{total_benchmarks} benchmarks completed ({(completed_benchmarks / total_benchmarks) * 100:.1f}%) - Current success rate: {success_rate:.1f}%\n"
+                )
+
+                # Log memory after benchmark
+                logger.info("Memory usage after benchmark:")
+                log_memory_usage("after")
+
+            except Exception as e:
+                logger.error(f"Failed to run benchmark for {model}: {str(e)}")
+                logger.error(traceback.format_exc())
+                failed_benchmarks += 1
+                failed_models.append(model)
+            finally:
+                # Restore model key in case the config is reused
+                benchmark_config["model"] = model
+
+        # Calculate execution time
+        total_time = time.time() - start_time
+        hours = int(total_time // 3600)
+        minutes = int((total_time % 3600) // 60)
+        seconds = int(total_time % 60)
+
+        # Print summary
+        logger.info("\n" + "=" * 50)
+        logger.info("BENCHMARK EXECUTION SUMMARY")
+        logger.info("=" * 50)
+        logger.info(f"Total execution time: {hours}h {minutes}m {seconds}s")
+        logger.info(f"Total benchmarks: {total_benchmarks}")
+        logger.info(
+            f"Successfully completed: {completed_benchmarks - failed_benchmarks}"
+        )
+        logger.info(f"Failed: {failed_benchmarks}")
+        logger.info(f"Skipped (already conducted): {skipped_benchmarks}")
+        logger.info(
+            f"Success rate: {((completed_benchmarks - failed_benchmarks) / total_benchmarks) * 100:.1f}%"
+        )
+
+        if failed_models:
+            logger.info("\nFailed models:")
+            for model in failed_models:
+                logger.info(f"  - {model}")
+
+        logger.info("\nConfiguration:")
+        logger.info(f"  Backend: {self.backend}")
+        logger.info(f"  Device: {self.device}")
+        logger.info(f"  Subset: {self.subset}")
+        logger.info(f"  Machine: {self.machine}")
+        logger.info(f"  Rerun already conducted: {rerun_already_conducted_benchmarks}")
+        logger.info("=" * 50 + "\n")
 
     def is_benchmark_conducted(self, push_repo_id, subfolder):
         try:
@@ -114,18 +282,6 @@ def run_benchmark(self, **kwargs):
         benchmark_name = self.get_benchmark_name(model, **kwargs)
         subfolder = f"{benchmark_name}/{model.replace('/', '--')}"
 
-        if not self.is_benchmark_supported(**kwargs):
-            self.logger.info(
-                f"Skipping benchmark {benchmark_name} with model {model} since it is not supported"
-            )
-            return
-
-        if self.is_benchmark_conducted(self.push_repo_id, subfolder):
-            self.logger.info(
-                f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted"
-            )
-            return
-
         benchmark_config = self.get_benchmark_config(model, **kwargs)
         benchmark_config.push_to_hub(
             repo_id=self.push_repo_id, subfolder=subfolder, private=True
@@ -141,8 +297,23 @@ def get_benchmark_config(self, model: str, **kwargs) -> BenchmarkConfig:
     def execute_and_log_benchmark(
         self, benchmark_config: BenchmarkConfig, subfolder: str
     ):
+        # Get run_id and run_start_time from environment variables
+        run_id = os.environ.get("BENCHMARK_RUN_ID")
+        run_start_time = os.environ.get("BENCHMARK_START_TIME")
+
+        if not run_id or not run_start_time:
+            # Fallback to generating new ones if not provided
+            run_id = str(uuid.uuid4())
+            run_start_time = datetime.now().isoformat()
+
+        success = False
+        error_traceback = ""
+
         try:
-            self.logger.info(
+            logger.info("Memory usage before execution:")
+            log_memory_usage("before")
+
+            logger.info(
                 f"Running benchmark {benchmark_config.name} with model {benchmark_config.backend.model}"
             )
             benchmark_report = Benchmark.launch(benchmark_config)
@@ -153,13 +324,17 @@ def execute_and_log_benchmark(
             benchmark.push_to_hub(
                 repo_id=self.push_repo_id, subfolder=subfolder, private=True
             )
+
+            logger.info("Memory usage after execution:")
+            log_memory_usage("after")
+
+            success = True
+
         except Exception as e:
-            self.logger.error(
-                f"Benchmark {benchmark_config.name} failed with model {benchmark_config.backend.model}, error:\n{e}"
-            )
-            benchmark_report = BenchmarkReport.from_dict(
-                {"traceback": traceback.format_exc()}
-            )
+            error_msg = f"Benchmark {benchmark_config.name} failed with model {benchmark_config.backend.model}, error:\n{e}"
+            logger.error(error_msg)
+            error_traceback = traceback.format_exc()
+            benchmark_report = BenchmarkReport.from_dict({"traceback": error_traceback})
             benchmark_report.push_to_hub(
                 repo_id=self.push_repo_id, subfolder=subfolder, private=True
             )
@@ -167,3 +342,26 @@ def execute_and_log_benchmark(
             benchmark.push_to_hub(
                 repo_id=self.push_repo_id, subfolder=subfolder, private=True
             )
+
+        finally:
+            # At this point self.machine and self.subset should be strings
+            # If they're not, use default values
+            machine = self.machine if self.machine is not None else "unknown"
+            subset = self.subset if self.subset is not None else "unknown"
+
+            # Create and upload run details
+            run_details = BenchmarkRunDetails(
+                machine=machine,
+                hardware=self.device,
+                subsets=subset,
+                backends=self.backend,
+                model=benchmark_config.backend.model,
+                success=success,
+                traceback=error_traceback,
+                last_updated=datetime.now().isoformat(),
+                run_id=run_id,
+                run_start_time=run_start_time,
+            )
+
+            # Upload to dashboard
+            self.dashboard_manager.upload_run_details(run_details)
diff --git a/llm_perf/common/dashboard.py b/llm_perf/common/dashboard.py
new file mode 100644
index 0000000..cba4345
--- /dev/null
+++ b/llm_perf/common/dashboard.py
@@ -0,0 +1,15 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class BenchmarkRunDetails:
+    machine: str
+    hardware: str
+    subsets: str
+    backends: str
+    model: str
+    success: bool
+    traceback: str
+    last_updated: str
+    run_id: str
+    run_start_time: str
diff --git a/llm_perf/common/dashboard_manager.py b/llm_perf/common/dashboard_manager.py
new file mode 100644
index 0000000..88d2469
--- /dev/null
+++ b/llm_perf/common/dashboard_manager.py
@@ -0,0 +1,232 @@
+import pandas as pd
+from datasets import Dataset, load_dataset
+from huggingface_hub import create_repo, HfApi
+from loguru import logger
+from typing import List, Optional
+import time
+
+from llm_perf.common.dashboard import BenchmarkRunDetails
+
+DASHBOARD_REPO_ID = "optimum-benchmark/llm-perf-dashboard"
+MAX_RETRIES = 3
+RETRY_DELAY = 2  # seconds
+
+
+class DashboardManager:
+    def __init__(self):
+        # Ensure the dataset repository exists
+        create_repo(repo_id=DASHBOARD_REPO_ID, repo_type="dataset", exist_ok=True)
+        self._current_commit = None
+        self._api = HfApi()
+        self._is_first_upload = False
+
+    def _get_current_commit(self) -> Optional[str]:
+        """Get the current commit hash of the main branch."""
+        try:
+            repo_info = self._api.repo_info(
+                repo_id=DASHBOARD_REPO_ID, repo_type="dataset"
+            )
+            return repo_info.sha
+        except Exception as e:
+            logger.error(f"Failed to get current commit: {str(e)}")
+            return None
+
+    def _load_existing_dataset(self) -> Optional[Dataset]:
+        """Load the existing dataset from the hub."""
+        try:
+            dataset = load_dataset(DASHBOARD_REPO_ID, split="train")
+            if isinstance(dataset, Dataset):
+                self._current_commit = self._get_current_commit()
+                return dataset
+            else:
+                logger.error("Loaded dataset is not of type Dataset")
+                return None
+        except Exception as e:
+            if "doesn't contain any data files" in str(e):
+                logger.info("No existing dataset found, this will be the first upload")
+                self._is_first_upload = True
+                self._current_commit = self._get_current_commit()
+                return None
+            logger.error(f"Failed to load existing dataset: {str(e)}")
+            return None
+
+    def _verify_commit(self) -> bool:
+        """Verify that the current commit hasn't changed."""
+        if self._is_first_upload:
+            # For first upload, we don't need to verify commit
+            return True
+
+        current = self._get_current_commit()
+        if current != self._current_commit:
+            logger.error("Dataset has been updated since last read. Aborting upload.")
+            return False
+        return True
+
+    def _convert_to_dict(self, run_details: BenchmarkRunDetails) -> dict:
+        """Convert BenchmarkRunDetails to a dictionary format suitable for the dataset."""
+        return {
+            "machine": run_details.machine,
+            "hardware": run_details.hardware,
+            "subsets": run_details.subsets,
+            "backends": run_details.backends,
+            "model": run_details.model,
+            "success": run_details.success,
+            "traceback": run_details.traceback,
+            "last_updated": run_details.last_updated,
+            "run_id": run_details.run_id,
+            "run_start_time": run_details.run_start_time,
+        }
+
+    def upload_run_details(self, run_details: BenchmarkRunDetails):
+        """Upload a single benchmark run details to the dashboard dataset."""
+        for attempt in range(MAX_RETRIES):
+            try:
+                # Reset first upload flag on each attempt
+                self._is_first_upload = False
+
+                # Load existing dataset
+                existing_dataset = self._load_existing_dataset()
+                if existing_dataset is None and not self._is_first_upload:
+                    # Failed to load for reasons other than being first upload
+                    if attempt < MAX_RETRIES - 1:
+                        time.sleep(RETRY_DELAY)
+                        continue
+                    else:
+                        logger.error(
+                            "Max retries reached. Failed to upload run details."
+                        )
+                        return
+
+                # Get existing data or empty list for first upload
+                existing_data = existing_dataset.to_list() if existing_dataset else []
+
+                # Convert the new run details to a dictionary
+                new_run = self._convert_to_dict(run_details)
+
+                # Combine existing data with new run
+                combined_data = existing_data + [new_run]
+
+                # Create new dataset
+                dataset = Dataset.from_list(combined_data)
+
+                # Verify commit hasn't changed (skipped for first upload)
+                if not self._verify_commit():
+                    if attempt < MAX_RETRIES - 1:
+                        time.sleep(RETRY_DELAY)
+                        continue
+                    else:
+                        logger.error(
+                            "Max retries reached. Failed to upload run details."
+                        )
+                        return
+
+                # Push to hub
+                dataset.push_to_hub(repo_id=DASHBOARD_REPO_ID, split="train")
+                logger.info(
+                    f"Successfully uploaded run details for {run_details.run_id} to dashboard"
+                )
+                break
+
+            except Exception as e:
+                logger.error(
+                    f"Failed to upload run details (attempt {attempt + 1}/{MAX_RETRIES}): {str(e)}"
+                )
+                if attempt < MAX_RETRIES - 1:
+                    time.sleep(RETRY_DELAY)
+                    continue
+                break
+
+    def upload_multiple_run_details(self, run_details_list: List[BenchmarkRunDetails]):
+        """Upload multiple benchmark run details to the dashboard dataset."""
+        for attempt in range(MAX_RETRIES):
+            try:
+                # Load existing dataset
+                existing_dataset = self._load_existing_dataset()
+                if existing_dataset is None:
+                    existing_data = []
+                else:
+                    existing_data = existing_dataset.to_list()
+
+                # Convert all new run details to dictionaries
+                new_runs = [self._convert_to_dict(rd) for rd in run_details_list]
+
+                # Combine existing data with new runs
+                combined_data = existing_data + new_runs
+
+                # Create new dataset
+                dataset = Dataset.from_list(combined_data)
+
+                # Verify commit hasn't changed
+                if not self._verify_commit():
+                    if attempt < MAX_RETRIES - 1:
+                        time.sleep(RETRY_DELAY)
+                        continue
+                    else:
+                        logger.error(
+                            "Max retries reached. Failed to upload run details."
+                        )
+                        return
+
+                # Push to hub
+                dataset.push_to_hub(repo_id=DASHBOARD_REPO_ID, split="train")
+                logger.info(
+                    f"Successfully uploaded {len(run_details_list)} run details to dashboard"
+                )
+                break
+
+            except Exception as e:
+                logger.error(
+                    f"Failed to upload run details (attempt {attempt + 1}/{MAX_RETRIES}): {str(e)}"
+                )
+                if attempt < MAX_RETRIES - 1:
+                    time.sleep(RETRY_DELAY)
+                    continue
+                break
+
+    def get_latest_runs(
+        self,
+        machine: Optional[str] = None,
+        hardware: Optional[str] = None,
+        model: Optional[str] = None,
+        limit: int = 100,
+    ) -> pd.DataFrame:
+        """
+        Retrieve the latest benchmark runs from the dashboard dataset.
+
+        Args:
+            machine: Filter by machine name
+            hardware: Filter by hardware type
+            model: Filter by model name
+            limit: Maximum number of runs to return
+
+        Returns:
+            DataFrame containing the latest runs
+        """
+        try:
+            # Load the dataset
+            dataset = load_dataset(DASHBOARD_REPO_ID, split="train")
+            if not isinstance(dataset, Dataset):
+                logger.error("Failed to load dataset: not a Dataset instance")
+                return pd.DataFrame()
+
+            # Convert to pandas DataFrame using dictionary
+            data_dict = {col: dataset[col] for col in dataset.column_names}
+            df = pd.DataFrame(data_dict)
+
+            # Apply filters
+            if machine:
+                df = df[df["machine"] == machine]
+            if hardware:
+                df = df[df["hardware"] == hardware]
+            if model:
+                df = df[df["model"] == model]
+
+            # Sort by last_updated and take the most recent runs
+            df["last_updated"] = pd.to_datetime(df["last_updated"])
+            df = df.sort_values("last_updated", ascending=False).head(limit)
+
+            return df
+
+        except Exception as e:
+            logger.error(f"Failed to retrieve latest runs: {str(e)}")
+            return pd.DataFrame()
diff --git a/llm_perf/common/get_top_model_from_hub.py b/llm_perf/common/get_top_model_from_hub.py
index 79feb64..ba337f0 100644
--- a/llm_perf/common/get_top_model_from_hub.py
+++ b/llm_perf/common/get_top_model_from_hub.py
@@ -5,6 +5,7 @@
 
 import requests
 from datasets import Dataset
+from loguru import logger
 
 
 def get_top_text_generation_models(
@@ -42,7 +43,7 @@ def get_top_text_generation_models(
 def save_to_json(data: List[Dict], filename: str):
     with open(filename, "w", encoding="utf-8") as f:
         json.dump(data, f, indent=2, ensure_ascii=False)
-    print(f"Data saved to {filename}")
+    logger.info(f"Data saved to {filename}")
 
 
 def compute_org_downloads(models: List[Dict]) -> Dict[str, int]:
@@ -55,7 +56,7 @@ def compute_org_downloads(models: List[Dict]) -> Dict[str, int]:
 def upload_to_hf_dataset(data: List[Dict], dataset_name: str):
     dataset = Dataset.from_list(data)
     dataset.push_to_hub(dataset_name)
-    print(f"Data uploaded to Hugging Face dataset: {dataset_name}")
+    logger.info(f"Data uploaded to Hugging Face dataset: {dataset_name}")
 
 
 def main():
@@ -64,16 +65,16 @@ def main():
     if huggingface_token:
         os.environ["HUGGINGFACE_HUB_TOKEN"] = huggingface_token
     else:
-        print(
+        logger.warning(
             "Warning: HUGGINGFACE_TOKEN not found in environment variables. Running without authentication."
         )
 
     n = 100
     top_models = get_top_text_generation_models(n)
 
-    print(f"\nTop {n} text generation models on Hugging Face Hub:")
+    logger.info(f"\nTop {n} text generation models on Hugging Face Hub:")
     for i, model in enumerate(top_models, 1):
-        print(
+        logger.info(
             f"{i}. {model['organization']}/{model['model_name']}: {model['downloads']:,} downloads"
         )
 
@@ -82,11 +83,11 @@ def main():
     upload_to_hf_dataset(top_models, dataset_name)
 
     # Display top 10 organizations by downloads
-    print("\nTop 10 organizations by total downloads:")
+    logger.info("\nTop 10 organizations by total downloads:")
     org_downloads = compute_org_downloads(top_models)
     sorted_orgs = sorted(org_downloads.items(), key=lambda x: x[1], reverse=True)[:10]
     for i, (org, downloads) in enumerate(sorted_orgs, 1):
-        print(f"{i}. {org}: {downloads:,} downloads")
+        logger.info(f"{i}. {org}: {downloads:,} downloads")
 
 
 if __name__ == "__main__":
diff --git a/llm_perf/common/memory_utils.py b/llm_perf/common/memory_utils.py
new file mode 100644
index 0000000..a9f9f90
--- /dev/null
+++ b/llm_perf/common/memory_utils.py
@@ -0,0 +1,195 @@
+import os
+import psutil
+import gc
+from typing import Dict, Optional
+from loguru import logger
+
+try:
+    import torch
+
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+# Memory thresholds in MB
+MEMORY_THRESHOLDS = {
+    "cpu_rss": 8192,  # 8GB
+    "cpu_percent": 90,  # 90%
+    "gpu_allocated": 8192,  # 8GB
+}
+
+
+class MemoryTracker:
+    def __init__(self):
+        self.initial_memory: Dict = {}
+        self.peak_memory: Dict = {"cpu_rss": 0, "cpu_percent": 0, "gpu_allocated": 0}
+        self.consecutive_increases = 0
+        self.last_memory: Optional[Dict] = None
+        self.before_memory: Optional[Dict] = None  # Store memory state before benchmark
+
+    def get_gpu_memory_info(self):
+        """Get GPU memory usage if CUDA is available"""
+        if not TORCH_AVAILABLE or not torch.cuda.is_available():
+            return None
+
+        try:
+            gpu_memory = []
+            for i in range(torch.cuda.device_count()):
+                allocated = torch.cuda.memory_allocated(i) / (1024 * 1024)  # MB
+                reserved = torch.cuda.memory_reserved(i) / (1024 * 1024)  # MB
+                gpu_memory.append(
+                    {"device": i, "allocated": allocated, "reserved": reserved}
+                )
+            return gpu_memory
+        except Exception as e:
+            logger.warning(f"Failed to get GPU memory info: {e}")
+            return None
+
+    def get_cpu_memory_info(self):
+        """Get CPU memory usage"""
+        try:
+            process = psutil.Process(os.getpid())
+            memory_info = process.memory_info()
+            return {
+                "rss": memory_info.rss / (1024 * 1024),  # MB
+                "vms": memory_info.vms / (1024 * 1024),  # MB
+                "percent": process.memory_percent(),
+            }
+        except Exception as e:
+            logger.warning(f"Failed to get CPU memory info: {e}")
+            return None
+
+    def check_thresholds(self, cpu_info: Optional[Dict], gpu_info: Optional[list]):
+        """Check if memory usage exceeds thresholds"""
+        if cpu_info:
+            if cpu_info["rss"] > MEMORY_THRESHOLDS["cpu_rss"]:
+                logger.warning(
+                    f"CPU RSS memory ({cpu_info['rss']:.0f}MB) exceeds threshold ({MEMORY_THRESHOLDS['cpu_rss']}MB)"
+                )
+            if cpu_info["percent"] > MEMORY_THRESHOLDS["cpu_percent"]:
+                logger.warning(
+                    f"CPU usage ({cpu_info['percent']:.1f}%) exceeds threshold ({MEMORY_THRESHOLDS['cpu_percent']}%)"
+                )
+
+        if gpu_info:
+            for device in gpu_info:
+                if device["allocated"] > MEMORY_THRESHOLDS["gpu_allocated"]:
+                    logger.warning(
+                        f"GPU {device['device']} allocated memory ({device['allocated']:.0f}MB) "
+                        f"exceeds threshold ({MEMORY_THRESHOLDS['gpu_allocated']}MB)"
+                    )
+
+    def check_persistent_growth(
+        self, cpu_info: Optional[Dict], gpu_info: Optional[list]
+    ):
+        """Monitor for persistent memory growth"""
+        if not cpu_info:
+            return
+
+        current_memory = {
+            "cpu_rss": cpu_info["rss"],
+            "cpu_percent": cpu_info["percent"],
+            "gpu_allocated": gpu_info[0]["allocated"] if gpu_info else 0,
+        }
+
+        # Update peak memory
+        for key in self.peak_memory:
+            self.peak_memory[key] = max(self.peak_memory[key], current_memory[key])
+
+        # Check for persistent growth
+        if self.last_memory:
+            is_increasing = all(
+                current_memory[key]
+                > self.last_memory[key] * 1.05  # 5% increase threshold
+                for key in current_memory
+            )
+
+            if is_increasing:
+                self.consecutive_increases += 1
+                if (
+                    self.consecutive_increases >= 3
+                ):  # Alert after 3 consecutive increases
+                    logger.warning(
+                        "Detected persistent memory growth over last 3 benchmarks:\n"
+                        f"Initial: CPU RSS={self.initial_memory.get('cpu_rss', 0):.0f}MB\n"
+                        f"Current: CPU RSS={current_memory['cpu_rss']:.0f}MB\n"
+                        f"Peak: CPU RSS={self.peak_memory['cpu_rss']:.0f}MB"
+                    )
+            else:
+                self.consecutive_increases = 0
+
+        # Store current memory for next comparison
+        self.last_memory = current_memory
+
+        # Store initial memory on first run
+        if not self.initial_memory:
+            self.initial_memory = current_memory
+
+    def log_memory_usage(self, phase: str = "current"):
+        """Log current memory usage for both CPU and GPU"""
+        # Force garbage collection
+        gc.collect()
+        if TORCH_AVAILABLE and torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+        # Get memory info
+        cpu_info = self.get_cpu_memory_info()
+        gpu_info = self.get_gpu_memory_info()
+
+        # Check thresholds and persistent growth
+        self.check_thresholds(cpu_info, gpu_info)
+        self.check_persistent_growth(cpu_info, gpu_info)
+
+        # Store before memory state
+        if phase == "before":
+            self.before_memory = {"cpu": cpu_info, "gpu": gpu_info}
+            prefix = "Before benchmark -"
+        elif phase == "after" and self.before_memory:
+            prefix = "After benchmark -"
+        else:
+            prefix = "Current -"
+
+        # Log CPU memory
+        if cpu_info:
+            cpu_msg = f"{prefix} CPU Memory - RSS: {cpu_info['rss']:.2f}MB, VMS: {cpu_info['vms']:.2f}MB, Percent: {cpu_info['percent']:.1f}%"
+
+            # Add delta if we're in after phase
+            if phase == "after" and self.before_memory and self.before_memory["cpu"]:
+                before_cpu = self.before_memory["cpu"]
+                cpu_msg += f" (Δ RSS: {cpu_info['rss'] - before_cpu['rss']:+.2f}MB, Δ VMS: {cpu_info['vms'] - before_cpu['vms']:+.2f}MB, Δ %: {cpu_info['percent'] - before_cpu['percent']:+.1f})"
+
+            logger.info(cpu_msg)
+
+        # Log GPU memory if available
+        if gpu_info:
+            for device in gpu_info:
+                gpu_msg = f"{prefix} GPU {device['device']} Memory - Allocated: {device['allocated']:.2f}MB, Reserved: {device['reserved']:.2f}MB"
+
+                # Add delta if we're in after phase
+                if (
+                    phase == "after"
+                    and self.before_memory
+                    and self.before_memory["gpu"]
+                ):
+                    before_gpu = next(
+                        (
+                            g
+                            for g in self.before_memory["gpu"]
+                            if g["device"] == device["device"]
+                        ),
+                        None,
+                    )
+                    if before_gpu:
+                        gpu_msg += f" (Δ Allocated: {device['allocated'] - before_gpu['allocated']:+.2f}MB, Δ Reserved: {device['reserved'] - before_gpu['reserved']:+.2f}MB)"
+
+                logger.info(gpu_msg)
+
+
+# Create a global memory tracker instance
+memory_tracker = MemoryTracker()
+
+
+# Function to use in other modules
+def log_memory_usage(phase: str = "current"):
+    """Global function to log memory usage"""
+    memory_tracker.log_memory_usage(phase)
diff --git a/llm_perf/common/utils.py b/llm_perf/common/utils.py
index bf67fe9..4f90947 100644
--- a/llm_perf/common/utils.py
+++ b/llm_perf/common/utils.py
@@ -1,6 +1,7 @@
 import pandas as pd
 
 from llm_perf.common.dependency import get_benchmark_top_n, is_debug_mode
+from loguru import logger
 
 INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256}
 GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64}
@@ -47,7 +48,7 @@ def get_top_llm_list(n: int = 10) -> list[str]:
 
         return top_models
     except Exception as e:
-        print(f"Error fetching top LLM list: {e}")
+        logger.error(f"Error fetching top LLM list: {e}")
         return []
 
 
@@ -55,6 +56,3 @@ def get_top_llm_list(n: int = 10) -> list[str]:
     CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["bigscience/bloomz-560m"]
 else:
     CANONICAL_PRETRAINED_OPEN_LLM_LIST = get_top_llm_list(n=get_benchmark_top_n())
-    print(
-        f"Benchamrking the following {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models: {CANONICAL_PRETRAINED_OPEN_LLM_LIST}"
-    )
diff --git a/llm_perf/dashboard_app.py b/llm_perf/dashboard_app.py
new file mode 100644
index 0000000..eba6808
--- /dev/null
+++ b/llm_perf/dashboard_app.py
@@ -0,0 +1,216 @@
+import gradio as gr
+import pandas as pd
+from datetime import datetime, timedelta
+import plotly.express as px
+from plotly.graph_objs._figure import Figure
+from loguru import logger
+from typing import Optional, Tuple, List
+
+from llm_perf.common.dashboard_manager import DashboardManager
+
+
+def create_status_plot(df: pd.DataFrame) -> Optional[Figure]:
+    """Create a status plot showing success/failure over time."""
+    if df.empty:
+        return None
+
+    # Ensure last_updated is datetime
+    df["last_updated"] = pd.to_datetime(df["last_updated"])
+    df["success_str"] = df["success"].map({True: "Success", False: "Failure"})
+
+    # Create hover text with more details
+    df["hover_text"] = df.apply(
+        lambda row: f"Model: {row['model']}<br>"
+        + f"Hardware: {row['hardware']}<br>"
+        + f"Machine: {row['machine']}<br>"
+        + f"Status: {row['success_str']}<br>"
+        + f"Time: {row['last_updated'].strftime('%Y-%m-%d %H:%M:%S')}",
+        axis=1,
+    )
+
+    fig = px.scatter(
+        df,
+        x="last_updated",
+        y="model",
+        color="success_str",
+        title="Benchmark Status Over Time",
+        labels={"last_updated": "Time", "model": "Model", "success_str": "Status"},
+        hover_data=["hover_text"],
+        height=600,
+    )  # Make plot taller to accommodate more models
+
+    # Update layout for better readability
+    fig.update_layout(
+        xaxis_title="Time",
+        yaxis_title="Model",
+        showlegend=True,
+        legend_title="Status",
+        hovermode="closest",
+    )
+
+    return fig
+
+
+def create_hardware_stats(df: pd.DataFrame) -> Optional[Figure]:
+    """Create statistics about hardware usage."""
+    if df.empty:
+        return None
+
+    stats = (
+        df.groupby(["hardware", "machine"])["success"]
+        .agg(["count", "mean"])
+        .reset_index()
+    )
+    # Calculate success rate as percentage
+    stats["success_rate"] = (stats["mean"] * 100).round(2)
+    # Drop the mean column since we've converted it to success_rate
+    stats = stats.drop("mean", axis=1)
+    stats = stats.rename(columns={"count": "total_runs"})
+
+    fig = px.bar(
+        stats,
+        x="hardware",
+        y="total_runs",
+        color="success_rate",
+        title="Hardware Usage and Success Rate",
+        labels={
+            "hardware": "Hardware Type",
+            "total_runs": "Total Runs",
+            "success_rate": "Success Rate (%)",
+        },
+    )
+    return fig
+
+
+class DashboardApp:
+    def __init__(self):
+        self.dashboard_manager = DashboardManager()
+
+    def refresh_data(
+        self,
+        time_range: str,
+        machine: str = "All",
+        hardware: str = "All",
+        model: str = "All",
+    ) -> Tuple[Optional[Figure], Optional[Figure], Optional[List[List[str]]]]:
+        """
+        Refresh dashboard data based on filters.
+
+        Args:
+            time_range: Time range to filter (e.g., '1d', '7d', '30d', 'all')
+            machine: Machine name filter
+            hardware: Hardware type filter
+            model: Model name filter
+
+        Returns:
+            Tuple of (status plot, hardware stats plot, data table)
+        """
+        try:
+            # Get the data
+            df = self.dashboard_manager.get_latest_runs(
+                machine=machine if machine != "All" else None,
+                hardware=hardware if hardware != "All" else None,
+                model=model if model != "All" else None,
+            )
+
+            if df.empty:
+                return None, None, None
+
+            # Apply time range filter
+            if time_range != "all":
+                days = int(time_range[:-1])
+                cutoff = datetime.now() - timedelta(days=days)
+                df = df[df["last_updated"] >= cutoff]
+
+            # Create visualizations
+            status_plot = create_status_plot(df)
+            hardware_plot = create_hardware_stats(df)
+
+            # Prepare table data
+            table_df = df[
+                ["model", "hardware", "machine", "success", "last_updated"]
+            ].copy()
+            table_df["last_updated"] = table_df["last_updated"].dt.strftime(
+                "%Y-%m-%d %H:%M:%S"
+            )
+            table_data: List[List[str]] = [
+                [str(val) for val in row] for row in table_df.values.tolist()
+            ]
+
+            return status_plot, hardware_plot, table_data
+
+        except Exception as e:
+            logger.error(f"Error refreshing dashboard data: {str(e)}")
+            return None, None, None
+
+    def launch(self, port: int = 7860, share: bool = False):
+        """Launch the Gradio interface.
+
+        Args:
+            port: Port to run the dashboard on
+            share: Whether to create a public URL
+        """
+        with gr.Blocks(title="LLM Performance Dashboard") as interface:
+            gr.Markdown("# 🚀 LLM Performance Dashboard")
+            gr.Markdown(
+                "Monitor the status and performance of LLM benchmarks across different hardware configurations."
+            )
+
+            with gr.Row():
+                time_range = gr.Dropdown(
+                    choices=["1d", "7d", "30d", "all"], value="7d", label="Time Range"
+                )
+                machine = gr.Dropdown(
+                    choices=["All"],  # Will be populated dynamically
+                    value="All",
+                    label="Machine",
+                )
+                hardware = gr.Dropdown(
+                    choices=["All"],  # Will be populated dynamically
+                    value="All",
+                    label="Hardware",
+                )
+                model = gr.Dropdown(
+                    choices=["All"],  # Will be populated dynamically
+                    value="All",
+                    label="Model",
+                )
+                refresh_btn = gr.Button("🔄 Refresh")
+
+            with gr.Row():
+                status_plot = gr.Plot(label="Benchmark Status")
+                hardware_plot = gr.Plot(label="Hardware Statistics")
+
+            with gr.Row():
+                results_table = gr.Dataframe(
+                    headers=["Model", "Hardware", "Machine", "Success", "Last Updated"],
+                    label="Recent Benchmark Results",
+                )
+
+            # Update function
+            def update_dashboard(
+                time_range: str, machine: str, hardware: str, model: str
+            ):
+                return self.refresh_data(time_range, machine, hardware, model)
+
+            # Register update function
+            refresh_btn.click(
+                fn=update_dashboard,
+                inputs=[time_range, machine, hardware, model],
+                outputs=[status_plot, hardware_plot, results_table],
+            )
+
+            # Auto-refresh on load
+            interface.load(
+                fn=update_dashboard,
+                inputs=[time_range, machine, hardware, model],
+                outputs=[status_plot, hardware_plot, results_table],
+            )
+
+        # Launch the interface with specified parameters
+        interface.launch(server_port=port, share=share)
+
+
+if __name__ == "__main__":
+    app = DashboardApp()
+    app.launch()
diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
index 0dd368b..90e8552 100644
--- a/llm_perf/update_llm_perf_leaderboard.py
+++ b/llm_perf/update_llm_perf_leaderboard.py
@@ -8,9 +8,9 @@
 import json
 
 from llm_perf.common.hardware_config import load_hardware_configs
-from huggingface_hub.utils import disable_progress_bars
+from loguru import logger
 
-disable_progress_bars()
+os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
 
 REPO_TYPE = "dataset"
 MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
@@ -93,7 +93,7 @@ def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str):
         path_in_repo=perf_df,
         path_or_fileobj=perf_df,
     )
-    print(f"Uploaded {perf_df} to {MAIN_REPO_ID}")
+    logger.info(f"Uploaded {perf_df} to {MAIN_REPO_ID}")
 
 
 # def check_if_url_exists(url: str):
@@ -101,7 +101,7 @@ def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str):
 #     Check if a URL exists
 #     """
 #     repo_exists
-#     print(f"response: {response}")
+#     logger.info(f"response: {response}")
 #     return response.status_code == 200
 
 
@@ -123,31 +123,33 @@ def update_perf_dfs():
                         hardware_config.hardware,
                     )
                 except Exception:
-                    print("Dataset not found for:")
-                    print(f"  • Backend: {backend}")
-                    print(f"  • Subset: {subset}")
-                    print(f"  • Machine: {hardware_config.machine}")
-                    print(f"  • Hardware Type: {hardware_config.hardware}")
+                    logger.error("Dataset not found for:")
+                    logger.error(f"  • Backend: {backend}")
+                    logger.error(f"  • Subset: {subset}")
+                    logger.error(f"  • Machine: {hardware_config.machine}")
+                    logger.error(f"  • Hardware Type: {hardware_config.hardware}")
                     url = f"{PERF_REPO_ID.format(subset=subset, machine=hardware_config.machine, backend=backend, hardware=hardware_config.hardware)}"
 
                     does_exist = repo_exists(url, repo_type="dataset")
 
                     if does_exist:
-                        print(f"Dataset exists: {url} but could not be processed")
+                        logger.error(
+                            f"Dataset exists: {url} but could not be processed"
+                        )
 
 
 def update_llm_df():
     """
     Scrape the open-llm-leaderboard and update the leaderboard dataframe
     """
-    
+
     scrapping_script = """
     git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
     pip install -r scrape-open-llm-leaderboard/requirements.txt -q
     python scrape-open-llm-leaderboard/main.py
     rm -rf scrape-open-llm-leaderboard
     """
-    
+
     subprocess.run(scrapping_script, shell=True)
     create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False)
     upload_file(
diff --git a/setup.py b/setup.py
index 7f6e115..ec61ad5 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,10 @@
     "huggingface_hub[hf_transfer]",
     "datasets>=2.14.6",
     "beautifulsoup4",
+    "loguru",
     "optimum-benchmark @ git+https://github.com/huggingface/optimum-benchmark.git",
+    "psutil",
+    "torch<2.6.0",
 ]
 
 # Optional dependencies
@@ -28,11 +31,12 @@
         "optimum-benchmark[openvino] @ git+https://github.com/huggingface/optimum-benchmark.git"
     ],
     "cuda": [
-        "flash-attn",
-        "auto-gptq",
-        "bitsandbytes",
-        "autoawq",
-        "torchao",
+        "optimum-benchmark[bitsandbytes,autoawq,auto-gptq] @ git+https://github.com/huggingface/optimum-benchmark.git",
+    ],
+    "dashboard": [
+        "gradio",
+        "pandas",
+        "plotly",
     ],
 }