Skip to content

Commit 8d01043

Browse files
AVRO-XXXX: FastReaderBuilder throws ClassCastException for schemas with java-class attribute
When using GenericDatumReader with schemas containing "java-class" attributes on string fields (e.g., java.math.BigDecimal), FastReaderBuilder throws: ClassCastException: Utf8 cannot be cast to String The bug is in getTransformingStringReader() which casts the result of stringReader.read() directly to String, but GenericData returns Utf8. This commit adds a failing test to reproduce the issue.
1 parent 99c7379 commit 8d01043

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* https://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.avro.io;
19+
20+
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
import static org.junit.jupiter.api.Assertions.assertNotNull;
22+
23+
import java.io.ByteArrayOutputStream;
24+
import java.io.IOException;
25+
26+
import org.apache.avro.Schema;
27+
import org.apache.avro.generic.GenericData;
28+
import org.apache.avro.generic.GenericDatumReader;
29+
import org.apache.avro.generic.GenericDatumWriter;
30+
import org.apache.avro.generic.GenericRecord;
31+
import org.junit.jupiter.api.Test;
32+
33+
/**
34+
* Tests for FastReaderBuilder behavior with schemas containing "java-class"
35+
* attributes.
36+
*/
37+
public class FastReaderBuilderJavaClassTest {
38+
39+
/**
40+
* Tests that GenericDatumReader can deserialize records with string fields that
41+
* have a "java-class" attribute (e.g., BigDecimal).
42+
*
43+
* This test reproduces a bug where
44+
* FastReaderBuilder.getTransformingStringReader() casts the result of
45+
* stringReader.read() directly to String, but in GenericData mode the reader
46+
* returns Utf8, causing a ClassCastException.
47+
*/
48+
@Test
49+
void genericDatumReaderWithJavaClassAttribute() throws IOException {
50+
// Schema with a string field that has "java-class": "java.math.BigDecimal"
51+
// This is a common pattern for representing decimal values as strings
52+
String schemaJson = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n"
53+
+ " {\"name\": \"id\", \"type\": \"string\"},\n" + " {\"name\": \"price\", \"type\": [\"null\", {\n"
54+
+ " \"type\": \"string\",\n" + " \"java-class\": \"java.math.BigDecimal\"\n" + " }]}\n" + " ]\n"
55+
+ "}";
56+
57+
Schema schema = new Schema.Parser().parse(schemaJson);
58+
59+
GenericRecord record = new GenericData.Record(schema);
60+
record.put("id", "123");
61+
record.put("price", "-0.0002");
62+
63+
ByteArrayOutputStream out = new ByteArrayOutputStream();
64+
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
65+
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
66+
writer.write(record, encoder);
67+
encoder.flush();
68+
69+
byte[] serialized = out.toByteArray();
70+
71+
// Deserialize using GenericDatumReader (which uses FastReaderBuilder by
72+
// default)
73+
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
74+
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serialized, null);
75+
76+
// AVRO-XXXX this should not throw ClassCastException: Utf8 cannot be cast to
77+
// String
78+
GenericRecord result = reader.read(null, decoder);
79+
80+
assertNotNull(result);
81+
assertEquals("123", result.get("id").toString());
82+
assertEquals("-0.0002", result.get("price").toString());
83+
}
84+
85+
/**
86+
* Tests that GenericDatumReader can deserialize records with a direct string
87+
* field (not in a union) that has a "java-class" attribute.
88+
*/
89+
@Test
90+
void genericDatumReaderWithDirectJavaClassString() throws IOException {
91+
String schemaJson = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n"
92+
+ " {\"name\": \"amount\", \"type\": {\n" + " \"type\": \"string\",\n"
93+
+ " \"java-class\": \"java.math.BigDecimal\"\n" + " }}\n" + " ]\n" + "}";
94+
95+
Schema schema = new Schema.Parser().parse(schemaJson);
96+
97+
GenericRecord record = new GenericData.Record(schema);
98+
record.put("amount", "123.45");
99+
100+
ByteArrayOutputStream out = new ByteArrayOutputStream();
101+
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
102+
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
103+
writer.write(record, encoder);
104+
encoder.flush();
105+
106+
byte[] serialized = out.toByteArray();
107+
108+
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
109+
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serialized, null);
110+
111+
GenericRecord result = reader.read(null, decoder);
112+
113+
assertNotNull(result);
114+
assertEquals("123.45", result.get("amount").toString());
115+
}
116+
}

0 commit comments

Comments
 (0)