@@ -90,16 +90,19 @@ public final class EncodingSniffer {
9090 private static final byte [] WHITESPACE = {0x09 , 0x0A , 0x0C , 0x0D , 0x20 , 0x3E };
9191 private static final byte [] COMMENT_END = {'-' , '-' , '>' };
9292
93- /** <a href="http ://encoding.spec.whatwg.org/#encodings">Reference </a> */
93+ /** <a href="https ://encoding.spec.whatwg.org/#names-and-labels">Encoding names and labels </a> */
9494 private static final Map <String , String > ENCODING_FROM_LABEL ;
9595 static {
9696 ENCODING_FROM_LABEL = new HashMap <>();
9797
9898 // The Encoding
9999 // ------------
100100 ENCODING_FROM_LABEL .put ("unicode-1-1-utf-8" , "utf-8" );
101+ ENCODING_FROM_LABEL .put ("unicode11utf8" , "utf-8" );
102+ ENCODING_FROM_LABEL .put ("unicode20utf8" , "utf-8" );
101103 ENCODING_FROM_LABEL .put ("utf-8" , "utf-8" );
102104 ENCODING_FROM_LABEL .put ("utf8" , "utf-8" );
105+ ENCODING_FROM_LABEL .put ("x-unicode20utf8" , "utf-8" );
103106
104107 // Legacy single-byte encodings
105108 // ----------------------------
@@ -367,8 +370,9 @@ public final class EncodingSniffer {
367370 ENCODING_FROM_LABEL .put ("csiso2022jp" , "iso-2022-jp" );
368371 ENCODING_FROM_LABEL .put ("iso-2022-jp" , "iso-2022-jp" );
369372
370- // iso-2022-jp
373+ // shift_jis
371374 ENCODING_FROM_LABEL .put ("csshiftjis" , "shift_jis" );
375+ ENCODING_FROM_LABEL .put ("ms932" , "shift_jis" );
372376 ENCODING_FROM_LABEL .put ("ms_kanji" , "shift_jis" );
373377 ENCODING_FROM_LABEL .put ("shift-jis" , "shift_jis" );
374378 ENCODING_FROM_LABEL .put ("shift_jis" , "shift_jis" );
@@ -396,14 +400,22 @@ public final class EncodingSniffer {
396400
397401 // replacement
398402 ENCODING_FROM_LABEL .put ("csiso2022kr" , "replacement" );
403+ ENCODING_FROM_LABEL .put ("hz-gb-2312" , "replacement" );
399404 ENCODING_FROM_LABEL .put ("iso-2022-cn" , "replacement" );
400405 ENCODING_FROM_LABEL .put ("iso-2022-cn-ext" , "replacement" );
401406 ENCODING_FROM_LABEL .put ("iso-2022-kr" , "replacement" );
407+ ENCODING_FROM_LABEL .put ("replacement" , "replacement" );
402408
403409 // utf-16be
410+ ENCODING_FROM_LABEL .put ("unicodefffe" , "utf-16be" );
404411 ENCODING_FROM_LABEL .put ("utf-16be" , "utf-16be" );
405412
406413 // utf-16le
414+ ENCODING_FROM_LABEL .put ("csunicode" , "utf-16le" );
415+ ENCODING_FROM_LABEL .put ("iso-10646-ucs-2" , "utf-16le" );
416+ ENCODING_FROM_LABEL .put ("ucs-2" , "utf-16le" );
417+ ENCODING_FROM_LABEL .put ("unicode" , "utf-16le" );
418+ ENCODING_FROM_LABEL .put ("unicodefeff" , "utf-16le" );
407419 ENCODING_FROM_LABEL .put ("utf-16" , "utf-16le" );
408420 ENCODING_FROM_LABEL .put ("utf-16le" , "utf-16le" );
409421
0 commit comments