@@ -125,11 +125,11 @@ def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]:
125
125
if time_stamp is not None :
126
126
end = time_stamp [ts_nums [num ]][1 ]
127
127
ts_lists .append ([begin , end ])
128
- else :
128
+ else :
129
+ word_lists .append (words [num ])
129
130
# length of time_stamp may not equal to length of words because of the (somehow improper) threshold set in timestamp_tools.py line 46, e.g., length of time_stamp can be zero but length of words is not.
130
131
# Moreover, move "word_lists.append(words[num])" into if clause, to keep length of word_lists and length of ts_lists equal.
131
- if time_stamp is not None and ts_nums [num ]< len (time_stamp ) and words [num ] != " " :
132
- word_lists .append (words [num ])
132
+ if time_stamp is not None and ts_nums [num ] < len (time_stamp ) and words [num ] != " " :
133
133
begin = time_stamp [ts_nums [num ]][0 ]
134
134
end = time_stamp [ts_nums [num ]][1 ]
135
135
ts_lists .append ([begin , end ])
@@ -302,28 +302,29 @@ def sentence_postprocess_sentencepiece(words):
302
302
sentence = "" .join (word_lists )
303
303
return sentence , real_word_lists
304
304
305
+
305
306
emo_dict = {
306
- "<|HAPPY|>" : "😊" ,
307
- "<|SAD|>" : "😔" ,
308
- "<|ANGRY|>" : "😡" ,
309
- "<|NEUTRAL|>" : "" ,
310
- "<|FEARFUL|>" : "😰" ,
311
- "<|DISGUSTED|>" : "🤢" ,
312
- "<|SURPRISED|>" : "😮" ,
307
+ "<|HAPPY|>" : "😊" ,
308
+ "<|SAD|>" : "😔" ,
309
+ "<|ANGRY|>" : "😡" ,
310
+ "<|NEUTRAL|>" : "" ,
311
+ "<|FEARFUL|>" : "😰" ,
312
+ "<|DISGUSTED|>" : "🤢" ,
313
+ "<|SURPRISED|>" : "😮" ,
313
314
}
314
315
315
316
event_dict = {
316
- "<|BGM|>" : "🎼" ,
317
- "<|Speech|>" : "" ,
318
- "<|Applause|>" : "👏" ,
319
- "<|Laughter|>" : "😀" ,
320
- "<|Cry|>" : "😭" ,
321
- "<|Sneeze|>" : "🤧" ,
322
- "<|Breath|>" : "" ,
323
- "<|Cough|>" : "🤧" ,
317
+ "<|BGM|>" : "🎼" ,
318
+ "<|Speech|>" : "" ,
319
+ "<|Applause|>" : "👏" ,
320
+ "<|Laughter|>" : "😀" ,
321
+ "<|Cry|>" : "😭" ,
322
+ "<|Sneeze|>" : "🤧" ,
323
+ "<|Breath|>" : "" ,
324
+ "<|Cough|>" : "🤧" ,
324
325
}
325
326
326
- lang_dict = {
327
+ lang_dict = {
327
328
"<|zh|>" : "<|lang|>" ,
328
329
"<|en|>" : "<|lang|>" ,
329
330
"<|yue|>" : "<|lang|>" ,
@@ -333,81 +334,90 @@ def sentence_postprocess_sentencepiece(words):
333
334
}
334
335
335
336
emoji_dict = {
336
- "<|nospeech|><|Event_UNK|>" : "❓" ,
337
- "<|zh|>" : "" ,
338
- "<|en|>" : "" ,
339
- "<|yue|>" : "" ,
340
- "<|ja|>" : "" ,
341
- "<|ko|>" : "" ,
342
- "<|nospeech|>" : "" ,
343
- "<|HAPPY|>" : "😊" ,
344
- "<|SAD|>" : "😔" ,
345
- "<|ANGRY|>" : "😡" ,
346
- "<|NEUTRAL|>" : "" ,
347
- "<|BGM|>" : "🎼" ,
348
- "<|Speech|>" : "" ,
349
- "<|Applause|>" : "👏" ,
350
- "<|Laughter|>" : "😀" ,
351
- "<|FEARFUL|>" : "😰" ,
352
- "<|DISGUSTED|>" : "🤢" ,
353
- "<|SURPRISED|>" : "😮" ,
354
- "<|Cry|>" : "😭" ,
355
- "<|EMO_UNKNOWN|>" : "" ,
356
- "<|Sneeze|>" : "🤧" ,
357
- "<|Breath|>" : "" ,
358
- "<|Cough|>" : "😷" ,
359
- "<|Sing|>" : "" ,
360
- "<|Speech_Noise|>" : "" ,
361
- "<|withitn|>" : "" ,
362
- "<|woitn|>" : "" ,
363
- "<|GBG|>" : "" ,
364
- "<|Event_UNK|>" : "" ,
337
+ "<|nospeech|><|Event_UNK|>" : "❓" ,
338
+ "<|zh|>" : "" ,
339
+ "<|en|>" : "" ,
340
+ "<|yue|>" : "" ,
341
+ "<|ja|>" : "" ,
342
+ "<|ko|>" : "" ,
343
+ "<|nospeech|>" : "" ,
344
+ "<|HAPPY|>" : "😊" ,
345
+ "<|SAD|>" : "😔" ,
346
+ "<|ANGRY|>" : "😡" ,
347
+ "<|NEUTRAL|>" : "" ,
348
+ "<|BGM|>" : "🎼" ,
349
+ "<|Speech|>" : "" ,
350
+ "<|Applause|>" : "👏" ,
351
+ "<|Laughter|>" : "😀" ,
352
+ "<|FEARFUL|>" : "😰" ,
353
+ "<|DISGUSTED|>" : "🤢" ,
354
+ "<|SURPRISED|>" : "😮" ,
355
+ "<|Cry|>" : "😭" ,
356
+ "<|EMO_UNKNOWN|>" : "" ,
357
+ "<|Sneeze|>" : "🤧" ,
358
+ "<|Breath|>" : "" ,
359
+ "<|Cough|>" : "😷" ,
360
+ "<|Sing|>" : "" ,
361
+ "<|Speech_Noise|>" : "" ,
362
+ "<|withitn|>" : "" ,
363
+ "<|woitn|>" : "" ,
364
+ "<|GBG|>" : "" ,
365
+ "<|Event_UNK|>" : "" ,
365
366
}
366
367
367
368
emo_set = {"😊" , "😔" , "😡" , "😰" , "🤢" , "😮" }
368
- event_set = {"🎼" , "👏" , "😀" , "😭" , "🤧" , "😷" ,}
369
+ event_set = {
370
+ "🎼" ,
371
+ "👏" ,
372
+ "😀" ,
373
+ "😭" ,
374
+ "🤧" ,
375
+ "😷" ,
376
+ }
377
+
369
378
370
379
def format_str_v2 (s ):
371
- sptk_dict = {}
372
- for sptk in emoji_dict :
373
- sptk_dict [sptk ] = s .count (sptk )
374
- s = s .replace (sptk , "" )
375
- emo = "<|NEUTRAL|>"
376
- for e in emo_dict :
377
- if sptk_dict [e ] > sptk_dict [emo ]:
378
- emo = e
379
- for e in event_dict :
380
- if sptk_dict [e ] > 0 :
381
- s = event_dict [e ] + s
382
- s = s + emo_dict [emo ]
383
-
384
- for emoji in emo_set .union (event_set ):
385
- s = s .replace (" " + emoji , emoji )
386
- s = s .replace (emoji + " " , emoji )
387
- return s .strip ()
380
+ sptk_dict = {}
381
+ for sptk in emoji_dict :
382
+ sptk_dict [sptk ] = s .count (sptk )
383
+ s = s .replace (sptk , "" )
384
+ emo = "<|NEUTRAL|>"
385
+ for e in emo_dict :
386
+ if sptk_dict [e ] > sptk_dict [emo ]:
387
+ emo = e
388
+ for e in event_dict :
389
+ if sptk_dict [e ] > 0 :
390
+ s = event_dict [e ] + s
391
+ s = s + emo_dict [emo ]
392
+
393
+ for emoji in emo_set .union (event_set ):
394
+ s = s .replace (" " + emoji , emoji )
395
+ s = s .replace (emoji + " " , emoji )
396
+ return s .strip ()
397
+
388
398
389
399
def rich_transcription_postprocess (s ):
390
- def get_emo (s ):
391
- return s [- 1 ] if s [- 1 ] in emo_set else None
392
- def get_event ( s ):
393
- return s [ 0 ] if s [ 0 ] in event_set else None
394
-
395
- s = s . replace ( "<|nospeech|><|Event_UNK|>" , "❓" )
396
- for lang in lang_dict :
397
- s = s . replace ( lang , "<|lang|>" )
398
- s_list = [ format_str_v2 ( s_i ). strip ( " " ) for s_i in s . split ( " <|lang|>" )]
399
- new_s = " " + s_list [ 0 ]
400
- cur_ent_event = get_event ( new_s )
401
- for i in range ( 1 , len ( s_list )):
402
- if len (s_list [ i ]) == 0 :
403
- continue
404
- if get_event ( s_list [ i ]) == cur_ent_event and get_event ( s_list [ i ]) != None :
405
- s_list [i ] = s_list [i ][ 1 :]
406
- #else:
407
- cur_ent_event = get_event ( s_list [ i ])
408
- if get_emo ( s_list [ i ]) != None and get_emo (s_list [i ]) == get_emo ( new_s ):
409
- new_s = new_s [: - 1 ]
410
- new_s += s_list [ i ]. strip (). lstrip ()
411
- new_s = new_s . replace ( "The." , " " )
412
- return new_s .strip ( )
413
-
400
+ def get_emo (s ):
401
+ return s [- 1 ] if s [- 1 ] in emo_set else None
402
+
403
+ def get_event ( s ):
404
+ return s [ 0 ] if s [ 0 ] in event_set else None
405
+
406
+ s = s . replace ( "<|nospeech|><|Event_UNK|>" , "❓" )
407
+ for lang in lang_dict :
408
+ s = s . replace ( lang , " <|lang|>" )
409
+ s_list = [ format_str_v2 ( s_i ). strip ( " " ) for s_i in s . split ( "<|lang|>" ) ]
410
+ new_s = " " + s_list [ 0 ]
411
+ cur_ent_event = get_event ( new_s )
412
+ for i in range ( 1 , len (s_list )) :
413
+ if len ( s_list [ i ]) == 0 :
414
+ continue
415
+ if get_event ( s_list [i ]) == cur_ent_event and get_event ( s_list [i ]) != None :
416
+ s_list [ i ] = s_list [ i ][ 1 :]
417
+ # else:
418
+ cur_ent_event = get_event (s_list [i ])
419
+ if get_emo ( s_list [ i ]) != None and get_emo ( s_list [ i ]) == get_emo ( new_s ):
420
+ new_s = new_s [: - 1 ]
421
+ new_s += s_list [ i ]. strip (). lstrip ( )
422
+ new_s = new_s .replace ( "The." , " " )
423
+ return new_s . strip ()
0 commit comments