Skip to content

Commit fb202f0

Browse files
fixed code repository for solution one in Sharepoint solution (openai#1264)
1 parent 774c524 commit fb202f0

File tree

2 files changed

+19
-102
lines changed

2 files changed

+19
-102
lines changed

examples/chatgpt/sharepoint_azure_function/Using_Azure_Functions_and_Microsoft_Graph_to_Query_SharePoint.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ See the documentation [here](https://learn.microsoft.com/en-us/azure/azure-funct
145145

146146
5. Leave all the other settings on this page as the default, but feel free to change based on your internal guidelines.
147147

148-
6. On the **permissions** tab, click **Add Permission** and add **Files.Read.All**, then **Add.** This allows this application to read files which is important in order to use the Microsoft Graph Search API.
148+
6. On the **permissions** tab, click **Add Permission** and add **Files.Read.All** and **Sites.ReadAll**, then **Add.** This allows this application to read files which is important in order to use the Microsoft Graph Search API.
149149

150150
4. Once it is created, **click on the enterprise application you just created** (so, leave the Function App page and land on the Enterprise Application that you just spun up)**.** We are now going to give it one more permission, to execute the Azure Function by impersonating the user logging into the application. See [here](https://learn.microsoft.com/en-us/azure/app-service/configure-authentication-provider-aad?tabs=workforce-tenant) for more details.
151151

examples/chatgpt/sharepoint_azure_function/solution_one_file_retrieval.js

Lines changed: 18 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
const { Client } = require('@microsoft/microsoft-graph-client');
2-
const pdfParse = require('pdf-parse');
32
const { Buffer } = require('buffer');
43
const path = require('path');
54
const axios = require('axios');
65
const qs = require('querystring');
7-
const { OpenAI } = require("openai");
6+
87

98
//// --------- ENVIRONMENT CONFIGURATION AND INITIALIZATION ---------
109
// Function to initialize Microsoft Graph client
@@ -46,79 +45,36 @@ const getOboToken = async (userAccessToken) => {
4645
};
4746
//// --------- DOCUMENT PROCESSING ---------
4847
// Function to fetch drive item content and convert to text
48+
4949
const getDriveItemContent = async (client, driveId, itemId, name) => {
5050
try {
51-
const fileType = path.extname(name).toLowerCase();
51+
// const fileType = path.extname(name).toLowerCase();
5252
// the below files types are the ones that are able to be converted to PDF to extract the text. See https://learn.microsoft.com/en-us/graph/api/driveitem-get-content-format?view=graph-rest-1.0&tabs=http
53-
const allowedFileTypes = ['.pdf', '.doc', '.docx', '.odp', '.ods', '.odt', '.pot', '.potm', '.potx', '.pps', '.ppsx', '.ppsxm', '.ppt', '.pptm', '.pptx', '.rtf'];
53+
// const allowedFileTypes = ['.pdf', '.doc', '.docx', '.odp', '.ods', '.odt', '.pot', '.potm', '.potx', '.pps', '.ppsx', '.ppsxm', '.ppt', '.pptm', '.pptx', '.rtf'];
5454
// filePath changes based on file type, adding ?format=pdf to convert non-pdf types to pdf for text extraction, so all files in allowedFileTypes above are converted to pdf
55-
const filePath = `/drives/${driveId}/items/${itemId}/content` + ((fileType === '.pdf' || fileType === '.txt' || fileType === '.csv') ? '' : '?format=pdf');
56-
if (allowedFileTypes.includes(fileType)) {
57-
response = await client.api(filePath).getStream();
58-
// The below takes the chunks in response and combines
59-
let chunks = [];
60-
for await (let chunk of response) {
55+
const filePath = `/drives/${driveId}/items/${itemId}`;
56+
const downloadPath = filePath + `/content`
57+
const fileStream = await client.api(downloadPath).getStream();
58+
let chunks = [];
59+
for await (let chunk of fileStream) {
6160
chunks.push(chunk);
6261
}
63-
let buffer = Buffer.concat(chunks);
64-
// the below extracts the text from the PDF.
65-
const pdfContents = await pdfParse(buffer);
66-
return pdfContents.text;
67-
} else if (fileType === '.txt') {
68-
// If the type is txt, it does not need to create a stream and instead just grabs the content
69-
response = await client.api(filePath).get();
70-
return response;
71-
} else if (fileType === '.csv') {
72-
response = await client.api(filePath).getStream();
73-
let chunks = [];
74-
for await (let chunk of response) {
75-
chunks.push(chunk);
76-
}
77-
let buffer = Buffer.concat(chunks);
78-
let dataString = buffer.toString('utf-8');
79-
return dataString
80-
81-
} else {
82-
return 'Unsupported File Type';
83-
}
84-
62+
const base64String = Buffer.concat(chunks).toString('base64');
63+
const file = await client.api(filePath).get();
64+
const mime_type = file.file.mimeType;
65+
const name = file.name;
66+
return {"name":name, "mime_type":mime_type, "content":base64String}
8567
} catch (error) {
8668
console.error('Error fetching drive content:', error);
8769
throw new Error(`Failed to fetch content for ${name}: ${error.message}`);
8870
}
8971
};
9072

91-
// Function to get relevant parts of text using gpt-3.5-turbo.
92-
const getRelevantParts = async (text, query) => {
93-
try {
94-
// We use your OpenAI key to initialize the OpenAI client
95-
const openAIKey = process.env["OPENAI_API_KEY"];
96-
const openai = new OpenAI({
97-
apiKey: openAIKey,
98-
});
99-
const response = await openai.chat.completions.create({
100-
// Using gpt-3.5-turbo due to speed to prevent timeouts. You can tweak this prompt as needed
101-
model: "gpt-3.5-turbo-0125",
102-
messages: [
103-
{"role": "system", "content": "You are a helpful assistant that finds relevant content in text based on a query. You only return the relevant sentences, and you return a maximum of 10 sentences"},
104-
{"role": "user", "content": `Based on this question: **"${query}"**, get the relevant parts from the following text:*****\n\n${text}*****. If you cannot answer the question based on the text, respond with 'No information provided'`}
105-
],
106-
// using temperature of 0 since we want to just extract the relevant content
107-
temperature: 0,
108-
// using max_tokens of 1000, but you can customize this based on the number of documents you are searching.
109-
max_tokens: 1000
110-
});
111-
return response.choices[0].message.content;
112-
} catch (error) {
113-
console.error('Error with OpenAI:', error);
114-
return 'Error processing text with OpenAI' + error;
115-
}
116-
};
11773

11874
//// --------- AZURE FUNCTION LOGIC ---------
11975
// Below is what the Azure Function executes
12076
module.exports = async function (context, req) {
121-
const query = req.query.query || (req.body && req.body.query);
77+
// const query = req.query.query || (req.body && req.body.query);
12278
const searchTerm = req.query.searchTerm || (req.body && req.body.searchTerm);
12379
if (!req.headers.authorization) {
12480
context.res = {
@@ -157,25 +113,6 @@ module.exports = async function (context, req) {
157113
};
158114

159115
try {
160-
// Function to tokenize content (e.g., based on words).
161-
const tokenizeContent = (content) => {
162-
return content.split(/\s+/);
163-
};
164-
165-
// Function to break tokens into 10k token windows for gpt-3.5-turbo
166-
const breakIntoTokenWindows = (tokens) => {
167-
const tokenWindows = []
168-
const maxWindowTokens = 10000; // 10k tokens
169-
let startIndex = 0;
170-
171-
while (startIndex < tokens.length) {
172-
const window = tokens.slice(startIndex, startIndex + maxWindowTokens);
173-
tokenWindows.push(window);
174-
startIndex += maxWindowTokens;
175-
}
176-
177-
return tokenWindows;
178-
};
179116
// This is where we are doing the search
180117
const list = await client.api('/search/query').post(requestBody);
181118

@@ -187,30 +124,9 @@ module.exports = async function (context, req) {
187124
for (const hit of container.hits) {
188125
if (hit.resource["@odata.type"] === "#microsoft.graph.driveItem") {
189126
const { name, id } = hit.resource;
190-
// We use the below to grab the URL of the file to include in the response
191-
const webUrl = hit.resource.webUrl.replace(/\s/g, "%20");
192-
// The Microsoft Graph API ranks the reponses, so we use this to order it
193-
const rank = hit.rank;
194-
// The below is where the file lives
195127
const driveId = hit.resource.parentReference.driveId;
196128
const contents = await getDriveItemContent(client, driveId, id, name);
197-
if (contents !== 'Unsupported File Type') {
198-
// Tokenize content using function defined previously
199-
const tokens = tokenizeContent(contents);
200-
201-
// Break tokens into 10k token windows
202-
const tokenWindows = breakIntoTokenWindows(tokens);
203-
204-
// Process each token window and combine results
205-
const relevantPartsPromises = tokenWindows.map(window => getRelevantParts(window.join(' '), query));
206-
const relevantParts = await Promise.all(relevantPartsPromises);
207-
const combinedResults = relevantParts.join('\n'); // Combine results
208-
209-
results.push({ name, webUrl, rank, contents: combinedResults });
210-
}
211-
else {
212-
results.push({ name, webUrl, rank, contents: 'Unsupported File Type' });
213-
}
129+
results.push(contents)
214130
}
215131
}
216132
}));
@@ -224,7 +140,8 @@ module.exports = async function (context, req) {
224140
} else {
225141
// If the Microsoft Graph API does return results, then run processList to iterate through.
226142
results = await processList();
227-
results.sort((a, b) => a.rank - b.rank);
143+
results = {'openaiFileResponse': results}
144+
// results.sort((a, b) => a.rank - b.rank);
228145
}
229146
context.res = {
230147
status: 200,

0 commit comments

Comments
 (0)