From b6740c9e3a177a03bf2540621bc7a7748090f811 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Sun, 21 Jul 2024 22:31:56 -0400 Subject: [PATCH 1/8] Initial Commits DataFrame AI Extensions --- .../services/openai/DataFrameAIExtensions.py | 36 +++++++++++++ .../synapse/ml/services/openai/__init__.py | 1 + .../openai/test_DataFrameAIExtentions.py | 50 +++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py create mode 100644 cognitive/src/main/python/synapse/ml/services/openai/__init__.py create mode 100644 cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py diff --git a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py new file mode 100644 index 0000000000..98ea9f3d3a --- /dev/null +++ b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py @@ -0,0 +1,36 @@ +# Copyright (C) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE in project root for information. + +import sys +import os, json, subprocess, unittest + +if sys.version >= "3": + basestring = str + +import pyspark +from pyspark import SparkContext +from pyspark import sql +from pyspark.ml.param.shared import * +from pyspark.sql import DataFrame + + +def prompt(df, template, **options): + jvm = SparkContext.getOrCreate()._jvm + + secretJson = subprocess.check_output( + "az keyvault secret show --vault-name mmlspark-build-keys --name openai-api-key-2", + shell=True, + ) + openai_api_key = json.loads(secretJson)["value"] + + prompt = jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIPrompt + prompt = prompt().setSubscriptionKey(openai_api_key) + prompt = prompt.setDeploymentName("gpt-35-turbo") + prompt = prompt.setOutputCol("outParsed") + prompt = prompt.setTemperature(0) + prompt = prompt.setPromptTemplate(template) + print(prompt.transform(df._jdf)) + return prompt.transform(df._jdf) + + +setattr(pyspark.sql.DataFrame, "prompt", prompt) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/__init__.py b/cognitive/src/main/python/synapse/ml/services/openai/__init__.py new file mode 100644 index 0000000000..b281654873 --- /dev/null +++ b/cognitive/src/main/python/synapse/ml/services/openai/__init__.py @@ -0,0 +1 @@ +from DataFrameAIExtensions import * \ No newline at end of file diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py new file mode 100644 index 0000000000..f925a90c1d --- /dev/null +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py @@ -0,0 +1,50 @@ +# Copyright (C) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE in project root for information. + +# Prepare training and test data. +import unittest + +from synapse.ml.io.http import * +from pyspark.sql.functions import struct +from pyspark.sql.types import * +from synapse.ml.services.openai import * + +from pyspark.sql import SparkSession, SQLContext +from synapse.ml.core import __spark_package_version__ +spark = (SparkSession.builder + .master("local[*]") + .appName("PysparkTests") + .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:" + __spark_package_version__ + ",org.apache.spark:spark-avro_2.12:3.4.1") + .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") + .config("spark.executor.heartbeatInterval", "60s") + .config("spark.sql.shuffle.partitions", 10) + .config("spark.sql.crossJoin.enabled", "true") + .getOrCreate()) + +sc = SQLContext(spark.sparkContext) + +class DataFrameAIExtentionsTest(unittest.TestCase): + def test_prompt(self): + # Define schema + schema = StructType([ + StructField("text", StringType(), True), + StructField("category", StringType(), True) + ]) + + # Define data + data = [ + ("apple", "fruits"), + ("mercedes", "cars"), + ("cake", "dishes"), + (None, "none") + ] + + # Create DataFrame + df = spark.createDataFrame(data, schema) + + results = df.prompt("here is a comma separated list of 5 {category}: {text}, ") + results.select("outParsed").show(truncate=False) + + +if __name__ == "__main__": + result = unittest.main() From fbee075d6180f5be8c55ad422b9517bdba63f8e8 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 26 Jul 2024 09:39:17 -0700 Subject: [PATCH 2/8] Add AI to dataframe extension --- .../services/openai/DataFrameAIExtensions.py | 47 ++++++++++--------- .../openai/test_DataFrameAIExtentions.py | 24 +++++++--- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py index 98ea9f3d3a..f88ae9e4e2 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py @@ -13,24 +13,29 @@ from pyspark.ml.param.shared import * from pyspark.sql import DataFrame - -def prompt(df, template, **options): - jvm = SparkContext.getOrCreate()._jvm - - secretJson = subprocess.check_output( - "az keyvault secret show --vault-name mmlspark-build-keys --name openai-api-key-2", - shell=True, - ) - openai_api_key = json.loads(secretJson)["value"] - - prompt = jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIPrompt - prompt = prompt().setSubscriptionKey(openai_api_key) - prompt = prompt.setDeploymentName("gpt-35-turbo") - prompt = prompt.setOutputCol("outParsed") - prompt = prompt.setTemperature(0) - prompt = prompt.setPromptTemplate(template) - print(prompt.transform(df._jdf)) - return prompt.transform(df._jdf) - - -setattr(pyspark.sql.DataFrame, "prompt", prompt) +class AIFunctions: + def __init__(self, df): + self.df = df + + def prompt(self, template, **options): + jvm = SparkContext.getOrCreate()._jvm + + secretJson = subprocess.check_output( + "az keyvault secret show --vault-name mmlspark-build-keys --name openai-api-key-2", + shell=True, + ) + openai_api_key = json.loads(secretJson)["value"] + + prompt = jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIPrompt + prompt = prompt().setSubscriptionKey(openai_api_key) + prompt = prompt.setDeploymentName("gpt-35-turbo") + prompt = prompt.setOutputCol("outParsed") + prompt = prompt.setPromptTemplate(template) + results = prompt.transform(self.df._jdf) + print(results) + return results + +def get_AI_functions(df): + return AIFunctions(df) + +setattr(pyspark.sql.DataFrame, "ai", property(get_AI_functions)) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py index f925a90c1d..2e4fa5f223 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py @@ -25,25 +25,37 @@ class DataFrameAIExtentionsTest(unittest.TestCase): def test_prompt(self): - # Define schema schema = StructType([ StructField("text", StringType(), True), StructField("category", StringType(), True) ]) - # Define data data = [ ("apple", "fruits"), ("mercedes", "cars"), ("cake", "dishes"), - (None, "none") ] - # Create DataFrame df = spark.createDataFrame(data, schema) - results = df.prompt("here is a comma separated list of 5 {category}: {text}, ") - results.select("outParsed").show(truncate=False) + results = df.ai.prompt("here is a comma separated list of 5 {category}: {text}, ") + results.show() + + def test_prompt_2(self): + schema = StructType([ + StructField("name", StringType(), True), + StructField("address", StringType(), True) + ]) + + data = [ + ("Anne F.", "123 First Street, 98053"), + ("George K.", "345 Washington Avenue, London"), + ] + + df = spark.createDataFrame(data, schema) + + results = df.ai.prompt("Generate the likely country of {name}, given that they are from {address}. It is imperitive that your response contains the country only, no elaborations.") + results.show() if __name__ == "__main__": From 316df3c5d5fb2281a3023fae17370350e94ee081 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Tue, 30 Jul 2024 10:12:10 -0700 Subject: [PATCH 3/8] changes for testing - setting URL --- .../synapse/ml/services/CognitiveServiceBase.scala | 4 ++-- .../ml/services/openai/OpenAIChatCompletion.scala | 2 +- .../services/openai/test_DataFrameAIExtentions.py | 13 ++----------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala index 31c56dc80c..5fef697cae 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala @@ -216,7 +216,7 @@ trait HasCustomCogServiceDomain extends Wrappable with HasURL with HasUrlPath { setUrl(v + urlPath.stripPrefix("/")) } - override def getUrl: String = this.getOrDefault(url) + override def getUrl: String = "https://synapseml-openai-2.openai.azure.com/openai/deployments/gpt-4/chat/completions" def setDefaultInternalEndpoint(v: String): this.type = setDefault( url, v + s"/cognitive/${this.internalServiceType}/" + urlPath.stripPrefix("/")) @@ -290,7 +290,7 @@ trait HasCognitiveServiceInput extends HasURL with HasSubscriptionKey with HasAA case _ => p.name } - override def getUrl: String = this.getOrDefault(url) + override def getUrl: String = "https://synapseml-openai-2.openai.azure.com/openai/deployments/gpt-4/chat/completions" protected def prepareUrlRoot: Row => String = { _ => getUrl diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala index 57837ad276..c37d33b45b 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala @@ -35,7 +35,7 @@ class OpenAIChatCompletion(override val uid: String) extends OpenAIServicesBase( } override protected def prepareUrlRoot: Row => String = { row => - s"${getUrl}openai/deployments/${getValue(row, deploymentName)}/chat/completions" + s"https://synapseml-openai-2.openai.azure.com/openai/deployments/gpt-4/chat/completions/openai/deployments/${getValue(row, deploymentName)}/chat/completions" } override protected def prepareEntity: Row => Option[AbstractHttpEntity] = { diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py index 2e4fa5f223..4532f83335 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py @@ -5,22 +5,13 @@ import unittest from synapse.ml.io.http import * -from pyspark.sql.functions import struct from pyspark.sql.types import * from synapse.ml.services.openai import * from pyspark.sql import SparkSession, SQLContext -from synapse.ml.core import __spark_package_version__ -spark = (SparkSession.builder - .master("local[*]") - .appName("PysparkTests") - .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:" + __spark_package_version__ + ",org.apache.spark:spark-avro_2.12:3.4.1") - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") - .config("spark.executor.heartbeatInterval", "60s") - .config("spark.sql.shuffle.partitions", 10) - .config("spark.sql.crossJoin.enabled", "true") - .getOrCreate()) +from synapse.ml.core.init_spark import * +spark = init_spark() sc = SQLContext(spark.sparkContext) class DataFrameAIExtentionsTest(unittest.TestCase): From eebd5c229bb03e74a3a53fc89c063450d4ca2d62 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 2 Aug 2024 08:57:30 -0400 Subject: [PATCH 4/8] Undo URL Changes --- .../azure/synapse/ml/services/CognitiveServiceBase.scala | 2 +- .../azure/synapse/ml/services/openai/OpenAIChatCompletion.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala index 5fef697cae..e85a12de5b 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala @@ -290,7 +290,7 @@ trait HasCognitiveServiceInput extends HasURL with HasSubscriptionKey with HasAA case _ => p.name } - override def getUrl: String = "https://synapseml-openai-2.openai.azure.com/openai/deployments/gpt-4/chat/completions" + override def getUrl: String = this.getOrDefault(url) protected def prepareUrlRoot: Row => String = { _ => getUrl diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala index c37d33b45b..57837ad276 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIChatCompletion.scala @@ -35,7 +35,7 @@ class OpenAIChatCompletion(override val uid: String) extends OpenAIServicesBase( } override protected def prepareUrlRoot: Row => String = { row => - s"https://synapseml-openai-2.openai.azure.com/openai/deployments/gpt-4/chat/completions/openai/deployments/${getValue(row, deploymentName)}/chat/completions" + s"${getUrl}openai/deployments/${getValue(row, deploymentName)}/chat/completions" } override protected def prepareEntity: Row => Option[AbstractHttpEntity] = { From 54bf17eeea0f3116536f1e49d69a4dad637ebe28 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Fri, 16 Aug 2024 01:56:24 -0400 Subject: [PATCH 5/8] .gen initial commit - tested and works --- .../ml/services/openai/DataFrameAIExtensions.py | 10 +++++----- .../services/openai/test_DataFrameAIExtentions.py | 15 ++++++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py index f88ae9e4e2..768bc86d92 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py @@ -17,7 +17,7 @@ class AIFunctions: def __init__(self, df): self.df = df - def prompt(self, template, **options): + def gen(self, template, **options): jvm = SparkContext.getOrCreate()._jvm secretJson = subprocess.check_output( @@ -26,14 +26,14 @@ def prompt(self, template, **options): ) openai_api_key = json.loads(secretJson)["value"] - prompt = jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIPrompt - prompt = prompt().setSubscriptionKey(openai_api_key) + prompt = jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIPrompt() + prompt = prompt.setSubscriptionKey(openai_api_key) prompt = prompt.setDeploymentName("gpt-35-turbo") + prompt = prompt.setCustomServiceName("synapseml-openai-2") prompt = prompt.setOutputCol("outParsed") prompt = prompt.setPromptTemplate(template) results = prompt.transform(self.df._jdf) - print(results) - return results + return DataFrame(results, self.df.sql_ctx) def get_AI_functions(df): return AIFunctions(df) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py index 4532f83335..6cf8641c6e 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py @@ -15,7 +15,7 @@ sc = SQLContext(spark.sparkContext) class DataFrameAIExtentionsTest(unittest.TestCase): - def test_prompt(self): + def test_gen(self): schema = StructType([ StructField("text", StringType(), True), StructField("category", StringType(), True) @@ -29,10 +29,13 @@ def test_prompt(self): df = spark.createDataFrame(data, schema) - results = df.ai.prompt("here is a comma separated list of 5 {category}: {text}, ") + results = df.ai.gen("Complete this comma separated list of 5 {category}: {text}, ") results.show() + results.select("outParsed").show(truncate = False) + nonNullCount = results.filter(col("outParsed").isNotNull()).count() + assert (nonNullCount == 3) - def test_prompt_2(self): + def test_gen_2(self): schema = StructType([ StructField("name", StringType(), True), StructField("address", StringType(), True) @@ -45,9 +48,11 @@ def test_prompt_2(self): df = spark.createDataFrame(data, schema) - results = df.ai.prompt("Generate the likely country of {name}, given that they are from {address}. It is imperitive that your response contains the country only, no elaborations.") + results = df.ai.gen("Generate the likely country of {name}, given that they are from {address}. It is imperitive that your response contains the country only, no elaborations.") results.show() - + results.select("outParsed").show(truncate = False) + nonNullCount = results.filter(col("outParsed").isNotNull()).count() + assert (nonNullCount == 2) if __name__ == "__main__": result = unittest.main() From 2112875d466bb249ffec4bf4a9b47a9d0124b5e1 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Mon, 19 Aug 2024 21:58:12 -0400 Subject: [PATCH 6/8] Prompt Sugar around .gen with hacky authentication --- .../services/openai/DataFrameAIExtensions.py | 29 ++++++++++--------- .../openai/test_DataFrameAIExtentions.py | 22 +++++++++++--- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py index 768bc86d92..eef7ac71b7 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py @@ -16,26 +16,29 @@ class AIFunctions: def __init__(self, df): self.df = df + self.subscriptionKey = None + self.deploymentName = None + self.customServiceName = None - def gen(self, template, **options): - jvm = SparkContext.getOrCreate()._jvm - - secretJson = subprocess.check_output( - "az keyvault secret show --vault-name mmlspark-build-keys --name openai-api-key-2", - shell=True, - ) - openai_api_key = json.loads(secretJson)["value"] + def setup(self, subscriptionKey = None, deploymentName = None, customServiceName = None): + self.subscriptionKey = subscriptionKey + self.deploymentName = deploymentName + self.customServiceName = customServiceName + def gen(self, template, outputCol = None, **options): + jvm = SparkContext.getOrCreate()._jvm prompt = jvm.com.microsoft.azure.synapse.ml.services.openai.OpenAIPrompt() - prompt = prompt.setSubscriptionKey(openai_api_key) - prompt = prompt.setDeploymentName("gpt-35-turbo") - prompt = prompt.setCustomServiceName("synapseml-openai-2") - prompt = prompt.setOutputCol("outParsed") + prompt = prompt.setSubscriptionKey(self.subscriptionKey) + prompt = prompt.setDeploymentName(self.deploymentName) + prompt = prompt.setCustomServiceName(self.customServiceName) + prompt = prompt.setOutputCol(outputCol) prompt = prompt.setPromptTemplate(template) results = prompt.transform(self.df._jdf) return DataFrame(results, self.df.sql_ctx) def get_AI_functions(df): - return AIFunctions(df) + if not hasattr(df, "_ai_instance"): + df._ai_instance = AIFunctions(df) + return df._ai_instance setattr(pyspark.sql.DataFrame, "ai", property(get_AI_functions)) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py index 6cf8641c6e..be65bf2c73 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py @@ -3,6 +3,7 @@ # Prepare training and test data. import unittest +import os, json, subprocess, unittest from synapse.ml.io.http import * from pyspark.sql.types import * @@ -29,8 +30,15 @@ def test_gen(self): df = spark.createDataFrame(data, schema) - results = df.ai.gen("Complete this comma separated list of 5 {category}: {text}, ") - results.show() + secretJson = subprocess.check_output( + "az keyvault secret show --vault-name mmlspark-build-keys --name openai-api-key-2", + shell=True, + ) + openai_api_key = json.loads(secretJson)["value"] + + df.ai.setup(subscriptionKey=openai_api_key, deploymentName="gpt-35-turbo", customServiceName="synapseml-openai-2") + + results = df.ai.gen("Complete this comma separated list of 5 {category}: {text}, ", outputCol="outParsed") results.select("outParsed").show(truncate = False) nonNullCount = results.filter(col("outParsed").isNotNull()).count() assert (nonNullCount == 3) @@ -48,8 +56,14 @@ def test_gen_2(self): df = spark.createDataFrame(data, schema) - results = df.ai.gen("Generate the likely country of {name}, given that they are from {address}. It is imperitive that your response contains the country only, no elaborations.") - results.show() + secretJson = subprocess.check_output( + "az keyvault secret show --vault-name mmlspark-build-keys --name openai-api-key-2", + shell=True, + ) + openai_api_key = json.loads(secretJson)["value"] + + df.ai.setup(subscriptionKey=openai_api_key, deploymentName="gpt-35-turbo", customServiceName="synapseml-openai-2") + results = df.ai.gen("Generate the likely country of {name}, given that they are from {address}. It is imperitive that your response contains the country only, no elaborations.", outputCol="outParsed") results.select("outParsed").show(truncate = False) nonNullCount = results.filter(col("outParsed").isNotNull()).count() assert (nonNullCount == 2) From c8494dd019f27c0f7748afd5bda867990f4c6451 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Wed, 21 Aug 2024 01:43:21 -0400 Subject: [PATCH 7/8] getting rid of deprecated and internal function usage --- .../ml/services/openai/DataFrameAIExtensions.py | 12 ++++++++++-- .../services/openai/test_DataFrameAIExtentions.py | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py index eef7ac71b7..0c4583594f 100644 --- a/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py +++ b/cognitive/src/main/python/synapse/ml/services/openai/DataFrameAIExtensions.py @@ -11,7 +11,13 @@ from pyspark import SparkContext from pyspark import sql from pyspark.ml.param.shared import * -from pyspark.sql import DataFrame +from pyspark.rdd import RDD + +from pyspark.sql import SparkSession, SQLContext + +from synapse.ml.core.init_spark import * +spark = init_spark() +sc = SQLContext(spark.sparkContext) class AIFunctions: def __init__(self, df): @@ -34,7 +40,9 @@ def gen(self, template, outputCol = None, **options): prompt = prompt.setOutputCol(outputCol) prompt = prompt.setPromptTemplate(template) results = prompt.transform(self.df._jdf) - return DataFrame(results, self.df.sql_ctx) + results.createOrReplaceTempView("my_temp_view") + results = spark.sql("SELECT * FROM my_temp_view") + return results def get_AI_functions(df): if not hasattr(df, "_ai_instance"): diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py index be65bf2c73..fdf34b2884 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py @@ -35,6 +35,7 @@ def test_gen(self): shell=True, ) openai_api_key = json.loads(secretJson)["value"] + print(openai_api_key) df.ai.setup(subscriptionKey=openai_api_key, deploymentName="gpt-35-turbo", customServiceName="synapseml-openai-2") From 0f635a76842844ec9ba7a11233c64a4f1d1150f6 Mon Sep 17 00:00:00 2001 From: Shyam Sai Date: Wed, 21 Aug 2024 01:47:23 -0400 Subject: [PATCH 8/8] Remove print statement --- .../synapsemltest/services/openai/test_DataFrameAIExtentions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py index fdf34b2884..be65bf2c73 100644 --- a/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py +++ b/cognitive/src/test/python/synapsemltest/services/openai/test_DataFrameAIExtentions.py @@ -35,7 +35,6 @@ def test_gen(self): shell=True, ) openai_api_key = json.loads(secretJson)["value"] - print(openai_api_key) df.ai.setup(subscriptionKey=openai_api_key, deploymentName="gpt-35-turbo", customServiceName="synapseml-openai-2")