Skip to content

Enrich Transcription/Translation Data with Detected Language and Original Text #364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -190,16 +190,21 @@ export class TranscriptionService {

const translateLanguage = languageInfo.translateLanguage == "zh-CN" ? "zh-Hans" : languageInfo.translateLanguage?.split('-')[0];
const translatedText = languageInfo.transcribeLanguage === languageInfo.translateLanguage ? event.result.text : event.result.translations.get(translateLanguage);
console.log(`🎤 TRANSLATION [Interim][${userSession.userId}][${subscription}]: ${translatedText}`);
const didTranslate = translatedText.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim() !== event.result.text.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim();
const detectedSourceLang = didTranslate ? languageInfo.transcribeLanguage : languageInfo.translateLanguage;

console.log(`🎤 TRANSLATION from ${detectedSourceLang} to ${languageInfo.translateLanguage} [Interim][${userSession.userId}][${subscription}]: ${translatedText}`);
const translationData: TranslationData = {
type: StreamType.TRANSLATION,
text: translatedText,
originalText: event.result.text,
startTime: this.calculateRelativeTime(event.result.offset),
endTime: this.calculateRelativeTime(event.result.offset + event.result.duration),
isFinal: false,
speakerId: event.result.speakerId,
transcribeLanguage: languageInfo.transcribeLanguage,
translateLanguage: languageInfo.translateLanguage
translateLanguage: languageInfo.translateLanguage,
didTranslate: didTranslate
};
this.broadcastTranscriptionResult(userSession, translationData);
this.updateTranscriptHistory(userSession, event, false);
Expand All @@ -209,17 +214,22 @@ export class TranscriptionService {
if (!event.result.translations) return;
const translateLanguage = languageInfo.translateLanguage == "zh-CN" ? "zh-Hans" : languageInfo.translateLanguage?.split('-')[0];
const translatedText = languageInfo.transcribeLanguage === languageInfo.translateLanguage ? event.result.text : event.result.translations.get(translateLanguage);
// Compare normalized text to determine if translation occurred
const didTranslate = translatedText.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim() !== event.result.text.toLowerCase().replace(/[^\p{L}\p{N}_]/gu, '').trim();
const detectedSourceLang = didTranslate ? languageInfo.transcribeLanguage : languageInfo.translateLanguage;

const translationData: TranslationData = {
type: StreamType.TRANSLATION,
isFinal: true,
text: translatedText,
originalText: event.result.text,
startTime: this.calculateRelativeTime(event.result.offset),
endTime: this.calculateRelativeTime(event.result.offset + event.result.duration),
speakerId: event.result.speakerId,
duration: event.result.duration,
transcribeLanguage: languageInfo.transcribeLanguage,
translateLanguage: languageInfo.translateLanguage
translateLanguage: languageInfo.translateLanguage,
didTranslate: didTranslate
};
this.broadcastTranscriptionResult(userSession, translationData);
this.updateTranscriptHistory(userSession, event, true);
Expand All @@ -239,8 +249,6 @@ export class TranscriptionService {
transcribeLanguage: languageInfo.transcribeLanguage
};

console.log('\n\n\n#### transcriptionData:', event.result.language, "\n\n\n");

if (languageInfo.transcribeLanguage === 'en-US') {
this.updateTranscriptHistory(userSession, event, false);
}
Expand All @@ -260,8 +268,7 @@ export class TranscriptionService {
duration: event.result.duration,
transcribeLanguage: languageInfo.transcribeLanguage
};
// console.log('\n\n\n#### result:', true, "\n\n\n");
// console.log('\n\n\n#### languageInfo.transcribeLanguage:', event.result.language, "\n\n\n");

if (languageInfo.transcribeLanguage === 'en-US') {
this.updateTranscriptHistory(userSession, event, true);
}
Expand Down
14 changes: 7 additions & 7 deletions augmentos_cloud/packages/sdk/old-README.md
Original file line number Diff line number Diff line change
Expand Up @@ -469,13 +469,13 @@ Represent data streams and entities in AugmentOS.
```typescript
export interface TranscriptionData extends BaseMessage {
type: StreamType.TRANSCRIPTION;
text: string;
isFinal: boolean;
language?: string;
startTime: number;
endTime: number;
speakerId?: string;
duration?: number;
text: string; // The transcribed text
isFinal: boolean; // Whether this is a final transcription
transcribeLanguage?: string; // The requested language for transcription
startTime: number; // Start time in milliseconds relative to session start
endTime: number; // End time in milliseconds relative to session start
speakerId?: string; // ID of the speaker if available
duration?: number; // Audio duration in milliseconds (usually for final)
}
```

Expand Down
1 change: 1 addition & 0 deletions augmentos_cloud/packages/sdk/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ export {
SettingsUpdate,
DataStream,
CloudToTpaMessage,
TranslationData,
ToolCall
} from './messages/cloud-to-tpa';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@ export interface TranscriptionData extends BaseMessage {
export interface TranslationData extends BaseMessage {
type: StreamType.TRANSLATION;
text: string; // The transcribed text
originalText?: string; // The original transcribed text before translation
isFinal: boolean; // Whether this is a final transcription
startTime: number; // Start time in milliseconds
endTime: number; // End time in milliseconds
speakerId?: string; // ID of the speaker if available
duration?: number; // Audio duration in milliseconds
transcribeLanguage?: string; // The language code of the transcribed text
translateLanguage?: string; // The language code of the translated text
didTranslate?: boolean; // Whether the text was translated
}

/**
Expand Down
2 changes: 2 additions & 0 deletions augmentos_docs/docs/events.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,15 @@ interface TranscriptionData {
interface TranslationData {
type: StreamType.TRANSLATION;
text: string;
originalText?: string;
isFinal: boolean;
startTime: number;
endTime: number;
speakerId?: string;
duration?: number;
transcribeLanguage?: string;
translateLanguage?: string;
didTranslate?: boolean;
}
```

Expand Down
6 changes: 6 additions & 0 deletions augmentos_docs/docs/reference/interfaces/event-types.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ interface TranslationData extends BaseMessage {
/** The translated text segment. */
text: string;

/** The original transcribed text before translation. */
originalText?: string;

/** Indicates if this is the final translation result for this utterance. */
isFinal: boolean;

Expand All @@ -84,6 +87,9 @@ interface TranslationData extends BaseMessage {

/** Language code of the translated text (e.g., 'es-ES'). Optional. */
translateLanguage?: string;

/** Indicates whether the text was actually translated (true) or not (false). */
didTranslate?: boolean;
}
```

Expand Down