Skip to content

Commit 6951040

Browse files
authored
Merge pull request #308 from Azure-Samples/v-durgeshs/fix/js-transcription-code-fix
Added recording pause on start. Active transcription on Call Connected
2 parents c42f974 + f12b78c commit 6951040

File tree

2 files changed

+96
-92
lines changed

2 files changed

+96
-92
lines changed

callautomation-live-transcription/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ devtunnel host
4343
Open the `.env` file to configure the following settings
4444

4545
1. `CALLBACK_HOST_URI`: Base url of the app. (For local development use dev tunnel url.).
46-
2. `COGNITIVE_SERVICE_ENDPOINT`: Azure Multi Service endpoint.
46+
2. `COGNITIVE_SERVICES_ENDPOINT`: Azure Multi Service endpoint.
4747
3. `ACS_CONNECTION_STRING`: Azure Communication Service resource's connection string.
4848
4. `ACS_PHONE_NUMBER`: Phone number associated with the Azure Communication Service resource. For e.g. "+1425XXXAAAA"
4949
5. `LOCALE`: Transcription locale

callautomation-live-transcription/src/app.ts

Lines changed: 95 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ import {
88
CallIntelligenceOptions, PlayOptions,
99
CallMediaRecognizeDtmfOptions,
1010
TranscriptionOptions,
11-
CallLocator, StartRecordingOptions, CallInvite,streamingData
11+
CallLocator, StartRecordingOptions, CallInvite, streamingData,
12+
StopTranscriptionOptions,
13+
CallConnectionProperties,
14+
RecordingStateResult
1215
}
1316
from "@azure/communication-call-automation";
1417
import { v4 as uuidv4 } from 'uuid';
@@ -40,7 +43,6 @@ const addAgentContext = "AddAgent";
4043
const incorrectDobContext = "IncorrectDob";
4144
const addParticipantFailureContext = "FailedToAddParticipant";
4245
const DobRegex = "^(0[1-9]|[12][0-9]|3[01])(0[1-9]|1[012])[12][0-9]{3}$";
43-
let isTrasncriptionActive = false;
4446
var maxTimeout = 2;
4547
const wordToNumberMapping = {
4648
zero: '0',
@@ -57,6 +59,7 @@ const wordToNumberMapping = {
5759

5860
let recordingId: string;
5961
let recordingLocation: string;
62+
let recordingCallBackUri: string;
6063
const agentPhonenumber = process.env.AGENT_PHONE_NUMBER;
6164
const acsPhoneNumber = process.env.ACS_PHONE_NUMBER;
6265
const transportType = process.env.TRANSPORT_TYPE;
@@ -84,13 +87,14 @@ app.post("/api/incomingCall", async (req: any, res: any) => {
8487
callerId = eventData.from.rawId;
8588
const uuid = uuidv4();
8689
const callbackUri = `${process.env.CALLBACK_HOST_URI}/api/callbacks/${uuid}?callerId=${callerId}`;
90+
recordingCallBackUri = callbackUri;
8791
const incomingCallContext = eventData.incomingCallContext;
8892
const websocketUrl = process.env.CALLBACK_HOST_URI.replace(/^https:\/\//, 'wss://');
89-
console.log(`Cognitive service endpoint: ${process.env.COGNITIVE_SERVICE_ENDPOINT.trim()}`);
90-
const callIntelligenceOptions: CallIntelligenceOptions = { cognitiveServicesEndpoint: process.env.COGNITIVE_SERVICE_ENDPOINT.trim() };
91-
const transcriptionOptions: TranscriptionOptions = { transportUrl: websocketUrl, transportType: transportType, locale: locale, startTranscription: false }
92-
const answerCallOptions: AnswerCallOptions = { callIntelligenceOptions: callIntelligenceOptions, transcriptionOptions: transcriptionOptions};
93-
console.log(`TranscriptionOption:" ${JSON.stringify(transcriptionOptions)}`);
93+
console.log(`Websocket url:- ${websocketUrl}`);
94+
console.log(`Cognitive service endpoint: ${process.env.COGNITIVE_SERVICES_ENDPOINT.trim()}`);
95+
const callIntelligenceOptions: CallIntelligenceOptions = { cognitiveServicesEndpoint: process.env.COGNITIVE_SERVICES_ENDPOINT.trim() };
96+
const transcriptionOptions: TranscriptionOptions = { transportUrl: websocketUrl, transportType: transportType, locale: locale, startTranscription: true }
97+
const answerCallOptions: AnswerCallOptions = { callIntelligenceOptions: callIntelligenceOptions, transcriptionOptions: transcriptionOptions };
9498
answerCallResult = await acsClient.answerCall(incomingCallContext, callbackUri, answerCallOptions);
9599
callConnection = answerCallResult.callConnection;
96100
}
@@ -111,15 +115,7 @@ app.post('/api/callbacks/:contextId', async (req: any, res: any) => {
111115
console.log("TranscriptionSubscription:-->" + JSON.stringify(transcriptionSubscription));
112116

113117
await startRecording(eventData.serverCallId);
114-
console.log(`Recording started. RecordingId: ${recordingId}`);
115-
callMedia = acsClient.getCallConnection(eventData.callConnectionId).getCallMedia();
116-
await initiateTranscription(callMedia);
117-
console.log("Transcription initiated.");
118-
await delayWithSetTimeout();
119-
await pauseOrStopTranscriptionAndRecording(callMedia, false, recordingId);
120-
await delayWithSetTimeout();
121-
/* Play hello prompt to user */
122-
await handleDtmfRecognizeAsync(callMedia, callerId, helpIVRPrompt, "hellocontext");
118+
console.log(`Recording started with pause on start. RecordingId: ${recordingId}`);
123119
}
124120
else if (event.type === "Microsoft.Communication.PlayCompleted") {
125121
if (eventData.operationContext === addAgentContext) {
@@ -134,12 +130,12 @@ app.post('/api/callbacks/:contextId', async (req: any, res: any) => {
134130
}
135131
else if (eventData.operationContext === goodbyeContext ||
136132
eventData.operationContext === addParticipantFailureContext) {
137-
await pauseOrStopTranscriptionAndRecording(callMedia, true, recordingId);
133+
await stopTranscriptionAndRecording(callMedia, eventData.callConnectionId, recordingId);
138134
await acsClient.getCallConnection(eventData.callConnectionId).hangUp(true);
139135
}
140136
}
141137
else if (event.type === "Microsoft.Communication.playFailed") {
142-
await pauseOrStopTranscriptionAndRecording(callMedia, true, recordingId);
138+
await stopTranscriptionAndRecording(callMedia, eventData.callConnectionId, recordingId);
143139
await acsClient.getCallConnection(eventData.callConnectionId).hangUp(true);
144140
}
145141
else if (event.type === "Microsoft.Communication.RecognizeCompleted") {
@@ -151,7 +147,6 @@ app.post('/api/callbacks/:contextId', async (req: any, res: any) => {
151147
const match = regex.exec(dobValueNumbers);
152148
if (match && match[0]) {
153149
await resumeTranscriptionAndRecording(callMedia, recordingId);
154-
await handlePlayAsync(callMedia, addAgentPrompt, addAgentContext);
155150
}
156151
else {
157152
await handleDtmfRecognizeAsync(callMedia, callerId, incorrectDobPrompt, incorrectDobContext);
@@ -180,18 +175,27 @@ app.post('/api/callbacks/:contextId', async (req: any, res: any) => {
180175
console.log(eventData.operationContext);
181176
console.log(`Transcription status:--> ${eventData.transcriptionUpdate.transcriptionStatus}`);
182177
console.log(`Transcription status details:--> ${eventData.transcriptionUpdate.transcriptionStatusDetails}`);
178+
callMedia = acsClient.getCallConnection(eventData.callConnectionId).getCallMedia();
179+
if (eventData.operationContext === undefined) {
180+
const stopTranscriptionOptions: StopTranscriptionOptions = {
181+
operationContext: "nextRecognizeContext"
182+
}
183+
await callMedia.stopTranscription(stopTranscriptionOptions);
184+
}
185+
else if (eventData.operationContext != undefined && eventData.operationContext === "startTranscriptionContext") {
186+
await handlePlayAsync(callMedia, addAgentPrompt, addAgentContext);
187+
}
183188
}
184189
else if (event.type === "Microsoft.Communication.TranscriptionStopped") {
185-
isTrasncriptionActive = false;
186-
console.log(`Received transcription event: ${event.type}`);
187-
console.log(`Transcription status:--> ${eventData.transcriptionUpdate.transcriptionStatus}`);
188-
console.log(`Transcription status details:--> ${eventData.transcriptionUpdate.transcriptionStatusDetails}`);
189-
}
190-
else if (event.type === "Microsoft.Communication.TranscriptionUpdated") {
191-
isTrasncriptionActive = false;
192190
console.log(`Received transcription event: ${event.type}`);
193191
console.log(`Transcription status:--> ${eventData.transcriptionUpdate.transcriptionStatus}`);
194192
console.log(`Transcription status details:--> ${eventData.transcriptionUpdate.transcriptionStatusDetails}`);
193+
callMedia = acsClient.getCallConnection(eventData.callConnectionId).getCallMedia();
194+
195+
if (eventData.operationContext != undefined && eventData.operationContext === "nextRecognizeContext") {
196+
/* Play hello prompt to user */
197+
await handleDtmfRecognizeAsync(callMedia, callerId, helpIVRPrompt, "hellocontext");
198+
}
195199
}
196200
else if (event.type === "Microsoft.Communication.TranscriptionFailed") {
197201
console.log("Received transcription event=%s, CorrelationId=%s, SubCode=%s, Message=%s",
@@ -200,6 +204,10 @@ app.post('/api/callbacks/:contextId', async (req: any, res: any) => {
200204
eventData?.ResultInformation?.SubCode,
201205
eventData?.ResultInformation?.Message);
202206
}
207+
else if (event.type === "Microsoft.Communication.RecordingStateChanged") {
208+
console.log("Received RecordingStateChanged event");
209+
console.log(`Recording state:--> ${eventData.state}`)
210+
}
203211
else if (event.type === "Microsoft.Communication.CallDisconnected") {
204212
console.log("Received CallDisconnected event");
205213
}
@@ -233,7 +241,7 @@ app.get('/download', async (req, res) => {
233241
}
234242
else {
235243
// Set the appropriate response headers for the file download
236-
res.setHeader('Content-Disposition', 'attachment; filename="recording.wav"');
244+
res.setHeader('Content-Disposition', 'attachment; filename="recording.mp4"');
237245
res.setHeader('Content-Type', 'audio/wav');
238246
const recordingStream = await acsClient.getCallRecording().downloadStreaming(recordingLocation);
239247

@@ -250,18 +258,16 @@ async function resumeTranscriptionAndRecording(callMedia: CallMedia, recordingId
250258
console.log(`Recording resumed. RecordingId: ${recordingId}`);
251259
}
252260

253-
async function pauseOrStopTranscriptionAndRecording(callMedia: CallMedia, stopRecording: boolean, recordingId: string) {
254-
console.log("Is trancription active-->" + isTrasncriptionActive)
255-
if (isTrasncriptionActive) {
261+
async function stopTranscriptionAndRecording(callMedia: CallMedia, callConnectionId: string, recordingId: string) {
262+
const callConnectionProperties: CallConnectionProperties = await acsClient.getCallConnection(callConnectionId).getCallConnectionProperties();
263+
const recordingStateResult: RecordingStateResult = await acsClient.getCallRecording().getState(recordingId);
264+
265+
if (callConnectionProperties.transcriptionSubscription.state === "active") {
256266
await callMedia.stopTranscription();
257267
}
258-
console.log(`stopRecording = ${stopRecording}`);
259-
if (stopRecording) {
268+
269+
if (recordingStateResult.recordingState === "active") {
260270
await acsClient.getCallRecording().stop(recordingId);
261-
console.log(`Recording stopped. RecordingId: ${recordingId}`);
262-
} else {
263-
await acsClient.getCallRecording().pause(recordingId);
264-
console.log(`Recording paused. RecordingId: ${recordingId}`);
265271
}
266272
}
267273

@@ -290,19 +296,26 @@ async function handlePlayAsync(callConnectionMedia: CallMedia, textToPlay: strin
290296
async function initiateTranscription(callConnectionMedia: CallMedia) {
291297
const startTranscriptionOptions = {
292298
locale: locale,
293-
operationContext: "StartTranscript"
299+
operationContext: "startTranscriptionContext"
294300
};
295301

296302
await callConnectionMedia.startTranscription(startTranscriptionOptions);
297-
isTrasncriptionActive = true;
298303
}
299304
async function startRecording(serverCallId: string) {
300305
const callLocator: CallLocator = {
301306
id: serverCallId,
302307
kind: "serverCallLocator",
303308
};
304309

305-
const recordingOptions: StartRecordingOptions = { callLocator: callLocator };
310+
const recordingOptions: StartRecordingOptions =
311+
{
312+
callLocator: callLocator,
313+
recordingChannel: "mixed",
314+
recordingContent: "audioVideo",
315+
recordingFormat: "mp4",
316+
recordingStateCallbackEndpointUrl: recordingCallBackUri,
317+
pauseOnStart: true
318+
};
306319
const response = await acsClient.getCallRecording().start(recordingOptions);
307320
recordingId = response.recordingId;
308321
}
@@ -314,66 +327,57 @@ function convertWordsArrayToNumberString(wordArray) {
314327
return result;
315328
}
316329

317-
async function delayWithSetTimeout(): Promise<void> {
318-
return new Promise((resolve) => {
319-
setTimeout(() => {
320-
resolve();
321-
}, 5000); // 5000 milliseconds = 5 seconds
322-
});
323-
}
324-
325330
// Start the server
326331
server.listen(PORT, async () => {
327332
console.log(`Server is listening on port ${PORT}`);
328333
await createAcsClient();
329334
});
330335

331-
332-
const wss = new WebSocket.Server({ server});
336+
const wss = new WebSocket.Server({ server });
333337

334338
wss.on('connection', (ws: WebSocket) => {
335-
console.log('Client connected');
336-
ws.on('message', (packetData: ArrayBuffer) => {
337-
const decoder = new TextDecoder();
338-
const stringJson = decoder.decode(packetData);
339-
console.log("STRING JSON=>--" + stringJson)
340-
var response = streamingData(packetData);
341-
if ('locale' in response) {
342-
console.log("--------------------------------------------")
343-
console.log("Transcription Metadata")
344-
console.log("CALL CONNECTION ID:-->" + response.callConnectionId);
345-
console.log("CORRELATION ID:-->" + response.correlationId);
346-
console.log("LOCALE:-->" + response.locale);
347-
console.log("SUBSCRIPTION ID:-->" + response.subscriptionId);
348-
console.log("--------------------------------------------")
349-
}
350-
if ('text' in response) {
351-
console.log("--------------------------------------------")
352-
console.log("Transcription Data")
353-
console.log("TEXT:-->" + response.text);
354-
console.log("FORMAT:-->" + response.format);
355-
console.log("CONFIDENCE:-->" + response.confidence);
356-
console.log("OFFSET IN TICKS:-->" + response.offsetInTicks);
357-
console.log("DURATION IN TICKS:-->" + response.durationInTicks);
358-
console.log("RESULT STATE:-->" + response.resultState);
359-
if ('phoneNumber' in response.participant) {
360-
console.log("PARTICIPANT:-->" + response.participant.phoneNumber);
361-
}
362-
if ('communicationUserId' in response.participant) {
363-
console.log("PARTICIPANT:-->" + response.participant.communicationUserId);
364-
}
365-
response.words.forEach(element => {
366-
console.log("TEXT:-->" + element.text)
367-
console.log("DURATION IN TICKS:-->" + element.durationInTicks)
368-
console.log("OFFSET IN TICKS:-->" + element.offsetInTicks)
369-
});
370-
console.log("--------------------------------------------")
371-
}
372-
});
373-
374-
ws.on('close', () => {
375-
console.log('Client disconnected');
376-
});
339+
console.log('Client connected');
340+
ws.on('message', (packetData: ArrayBuffer) => {
341+
const decoder = new TextDecoder();
342+
const stringJson = decoder.decode(packetData);
343+
console.log("STRING JSON=>--" + stringJson)
344+
var response = streamingData(packetData);
345+
if ('locale' in response) {
346+
console.log("--------------------------------------------")
347+
console.log("Transcription Metadata")
348+
console.log("CALL CONNECTION ID:-->" + response.callConnectionId);
349+
console.log("CORRELATION ID:-->" + response.correlationId);
350+
console.log("LOCALE:-->" + response.locale);
351+
console.log("SUBSCRIPTION ID:-->" + response.subscriptionId);
352+
console.log("--------------------------------------------")
353+
}
354+
if ('text' in response) {
355+
console.log("--------------------------------------------")
356+
console.log("Transcription Data")
357+
console.log("TEXT:-->" + response.text);
358+
console.log("FORMAT:-->" + response.format);
359+
console.log("CONFIDENCE:-->" + response.confidence);
360+
console.log("OFFSET IN TICKS:-->" + response.offsetInTicks);
361+
console.log("DURATION IN TICKS:-->" + response.durationInTicks);
362+
console.log("RESULT STATE:-->" + response.resultState);
363+
if ('phoneNumber' in response.participant) {
364+
console.log("PARTICIPANT:-->" + response.participant.phoneNumber);
365+
}
366+
if ('communicationUserId' in response.participant) {
367+
console.log("PARTICIPANT:-->" + response.participant.communicationUserId);
368+
}
369+
response.words.forEach(element => {
370+
console.log("TEXT:-->" + element.text)
371+
console.log("DURATION IN TICKS:-->" + element.durationInTicks)
372+
console.log("OFFSET IN TICKS:-->" + element.offsetInTicks)
373+
});
374+
console.log("--------------------------------------------")
375+
}
376+
});
377+
378+
ws.on('close', () => {
379+
console.log('Client disconnected');
380+
});
377381
});
378382

379383
console.log(`WebSocket server running on port ${PORT}`);

0 commit comments

Comments
 (0)