diff --git a/augmentos_cloud/packages/cloud/src/services/processing/transcription.service.ts b/augmentos_cloud/packages/cloud/src/services/processing/transcription.service.ts index 831cc492b..0f23e1027 100644 --- a/augmentos_cloud/packages/cloud/src/services/processing/transcription.service.ts +++ b/augmentos_cloud/packages/cloud/src/services/processing/transcription.service.ts @@ -190,16 +190,21 @@ export class TranscriptionService { const translateLanguage = languageInfo.translateLanguage == "zh-CN" ? "zh-Hans" : languageInfo.translateLanguage?.split('-')[0]; const translatedText = languageInfo.transcribeLanguage === languageInfo.translateLanguage ? event.result.text : event.result.translations.get(translateLanguage); - console.log(`🎤 TRANSLATION [Interim][${userSession.userId}][${subscription}]: ${translatedText}`); + const didTranslate = translatedText.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim() !== event.result.text.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim(); + const detectedSourceLang = didTranslate ? languageInfo.transcribeLanguage : languageInfo.translateLanguage; + + console.log(`🎤 TRANSLATION from ${detectedSourceLang} to ${languageInfo.translateLanguage} [Interim][${userSession.userId}][${subscription}]: ${translatedText}`); const translationData: TranslationData = { type: StreamType.TRANSLATION, text: translatedText, + originalText: event.result.text, startTime: this.calculateRelativeTime(event.result.offset), endTime: this.calculateRelativeTime(event.result.offset + event.result.duration), isFinal: false, speakerId: event.result.speakerId, transcribeLanguage: languageInfo.transcribeLanguage, - translateLanguage: languageInfo.translateLanguage + translateLanguage: languageInfo.translateLanguage, + didTranslate: didTranslate }; this.broadcastTranscriptionResult(userSession, translationData); this.updateTranscriptHistory(userSession, event, false); @@ -209,17 +214,22 @@ export class TranscriptionService { if (!event.result.translations) return; const translateLanguage = languageInfo.translateLanguage == "zh-CN" ? "zh-Hans" : languageInfo.translateLanguage?.split('-')[0]; const translatedText = languageInfo.transcribeLanguage === languageInfo.translateLanguage ? event.result.text : event.result.translations.get(translateLanguage); + // Compare normalized text to determine if translation occurred + const didTranslate = translatedText.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim() !== event.result.text.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim(); + const detectedSourceLang = didTranslate ? languageInfo.transcribeLanguage : languageInfo.translateLanguage; const translationData: TranslationData = { type: StreamType.TRANSLATION, isFinal: true, text: translatedText, + originalText: event.result.text, startTime: this.calculateRelativeTime(event.result.offset), endTime: this.calculateRelativeTime(event.result.offset + event.result.duration), speakerId: event.result.speakerId, duration: event.result.duration, transcribeLanguage: languageInfo.transcribeLanguage, - translateLanguage: languageInfo.translateLanguage + translateLanguage: languageInfo.translateLanguage, + didTranslate: didTranslate }; this.broadcastTranscriptionResult(userSession, translationData); this.updateTranscriptHistory(userSession, event, true); @@ -239,8 +249,6 @@ export class TranscriptionService { transcribeLanguage: languageInfo.transcribeLanguage }; - console.log('\n\n\n#### transcriptionData:', event.result.language, "\n\n\n"); - if (languageInfo.transcribeLanguage === 'en-US') { this.updateTranscriptHistory(userSession, event, false); } @@ -260,8 +268,7 @@ export class TranscriptionService { duration: event.result.duration, transcribeLanguage: languageInfo.transcribeLanguage }; - // console.log('\n\n\n#### result:', true, "\n\n\n"); - // console.log('\n\n\n#### languageInfo.transcribeLanguage:', event.result.language, "\n\n\n"); + if (languageInfo.transcribeLanguage === 'en-US') { this.updateTranscriptHistory(userSession, event, true); } diff --git a/augmentos_cloud/packages/sdk/old-README.md b/augmentos_cloud/packages/sdk/old-README.md index 31b5c8a1a..4456a4fbc 100644 --- a/augmentos_cloud/packages/sdk/old-README.md +++ b/augmentos_cloud/packages/sdk/old-README.md @@ -469,13 +469,13 @@ Represent data streams and entities in AugmentOS. ```typescript export interface TranscriptionData extends BaseMessage { type: StreamType.TRANSCRIPTION; - text: string; - isFinal: boolean; - language?: string; - startTime: number; - endTime: number; - speakerId?: string; - duration?: number; + text: string; // The transcribed text + isFinal: boolean; // Whether this is a final transcription + transcribeLanguage?: string; // The requested language for transcription + startTime: number; // Start time in milliseconds relative to session start + endTime: number; // End time in milliseconds relative to session start + speakerId?: string; // ID of the speaker if available + duration?: number; // Audio duration in milliseconds (usually for final) } ``` diff --git a/augmentos_cloud/packages/sdk/src/types/index.ts b/augmentos_cloud/packages/sdk/src/types/index.ts index ea8d6a788..c28b1a4ec 100644 --- a/augmentos_cloud/packages/sdk/src/types/index.ts +++ b/augmentos_cloud/packages/sdk/src/types/index.ts @@ -83,6 +83,7 @@ export { SettingsUpdate, DataStream, CloudToTpaMessage, + TranslationData, ToolCall } from './messages/cloud-to-tpa'; diff --git a/augmentos_cloud/packages/sdk/src/types/messages/cloud-to-tpa.ts b/augmentos_cloud/packages/sdk/src/types/messages/cloud-to-tpa.ts index e884d2da6..81c216cc4 100644 --- a/augmentos_cloud/packages/sdk/src/types/messages/cloud-to-tpa.ts +++ b/augmentos_cloud/packages/sdk/src/types/messages/cloud-to-tpa.ts @@ -73,6 +73,7 @@ export interface TranscriptionData extends BaseMessage { export interface TranslationData extends BaseMessage { type: StreamType.TRANSLATION; text: string; // The transcribed text + originalText?: string; // The original transcribed text before translation isFinal: boolean; // Whether this is a final transcription startTime: number; // Start time in milliseconds endTime: number; // End time in milliseconds @@ -80,6 +81,7 @@ export interface TranslationData extends BaseMessage { duration?: number; // Audio duration in milliseconds transcribeLanguage?: string; // The language code of the transcribed text translateLanguage?: string; // The language code of the translated text + didTranslate?: boolean; // Whether the text was translated } /** diff --git a/augmentos_docs/docs/events.md b/augmentos_docs/docs/events.md index 7490777e9..5e9494b14 100644 --- a/augmentos_docs/docs/events.md +++ b/augmentos_docs/docs/events.md @@ -145,6 +145,7 @@ interface TranscriptionData { interface TranslationData { type: StreamType.TRANSLATION; text: string; + originalText?: string; isFinal: boolean; startTime: number; endTime: number; @@ -152,6 +153,7 @@ interface TranslationData { duration?: number; transcribeLanguage?: string; translateLanguage?: string; + didTranslate?: boolean; } ``` diff --git a/augmentos_docs/docs/reference/interfaces/event-types.md b/augmentos_docs/docs/reference/interfaces/event-types.md index 6c2a35c0e..7f37ed5ac 100644 --- a/augmentos_docs/docs/reference/interfaces/event-types.md +++ b/augmentos_docs/docs/reference/interfaces/event-types.md @@ -64,6 +64,9 @@ interface TranslationData extends BaseMessage { /** The translated text segment. */ text: string; + /** The original transcribed text before translation. */ + originalText?: string; + /** Indicates if this is the final translation result for this utterance. */ isFinal: boolean; @@ -84,6 +87,9 @@ interface TranslationData extends BaseMessage { /** Language code of the translated text (e.g., 'es-ES'). Optional. */ translateLanguage?: string; + + /** Indicates whether the text was actually translated (true) or not (false). */ + didTranslate?: boolean; } ```