From b7e2a3722da5074e8290b64bbb21bb7b6b039849 Mon Sep 17 00:00:00 2001 From: Robert Tinn Date: Tue, 27 May 2025 17:30:29 +0100 Subject: [PATCH] Add updated disclaimer to rft healthbench --- .../fine-tuned_qa/reinforcement_finetuning_healthbench.ipynb | 4 ++++ registry.yaml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/fine-tuned_qa/reinforcement_finetuning_healthbench.ipynb b/examples/fine-tuned_qa/reinforcement_finetuning_healthbench.ipynb index a1b16c02af..00dd23ed1c 100644 --- a/examples/fine-tuned_qa/reinforcement_finetuning_healthbench.ipynb +++ b/examples/fine-tuned_qa/reinforcement_finetuning_healthbench.ipynb @@ -7,8 +7,12 @@ "source": [ "# Reinforcement Fine-Tuning with the OpenAI API for Conversational Reasoning\n", "\n", + "*This guide is for developers and ML practitioners who have some experience with OpenAIʼs APIs and wish to use their fine-tuned models for research or other appropriate uses. OpenAI’s services are not intended for the personalized treatment or diagnosis of any medical condition and are subject to our [applicable terms](https://openai.com/policies/).*\n", + "\n", "This notebook demonstrates how to use OpenAI's reinforcement fine-tuning (RFT) to improve a model's conversational reasoning capabilities (specifically asking questions to gain additional context and reduce uncertainty). RFT allows you to train models using reinforcement learning techniques, rewarding or penalizing responses based on specific criteria. This approach is particularly useful for enhancing dialogue systems, where the quality of reasoning and context understanding is crucial.\n", "\n", + "For a deep dive into the Reinforcement Fine-Tuning API and how to write effective graders, see [Exploring Model Graders for Reinforcement Fine-Tuning](https://cookbook.openai.com/examples/reinforcement_fine_tuning).\n", + "\n", "### HealthBench\n", "\n", "This cookbook evaluates and improves model performance on a focused subset of [HealthBench](https://openai.com/index/healthbench/), a benchmark suite for medical QA. This guide walks through how to configure the datasets, define evaluation rubrics, and fine-tune model behavior using reinforcement signals derived from custom graders.\n", diff --git a/registry.yaml b/registry.yaml index 3026254ae6..9502f0ef02 100644 --- a/registry.yaml +++ b/registry.yaml @@ -14,7 +14,7 @@ - fine-tuning - reinforcement-learning-graders -- title: Reinforcement Fine-tuning with the OpenAI API +- title: Reinforcement Fine-Tuning for Conversational Reasoning with the OpenAI API path: examples/fine-tuned_qa/reinforcement_finetuning_healthbench.ipynb date: 2025-05-21 authors: