Skip to content

Commit ce983cb

Browse files
committed
Harden the possible range of values for max dictionary size and max total sample size for dictionary training
patch by Stefan Miklosovic; reviewed by Yifan Cai for CASSANDRA-21194
1 parent 43a022c commit ce983cb

File tree

5 files changed

+52
-4
lines changed

5 files changed

+52
-4
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
5.1
2+
* Harden the possible range of values for max dictionary size and max total sample size for dictionary training (CASSANDRA-21194)
23
* Implement a guardrail ensuring that minimum training frequency parameter is provided in ZstdDictionaryCompressor (CASSANDRA-21192)
34
* Replace manual referencing with ColumnFamilyStore.selectAndReference when training a dictionary (CASSANDRA-21188)
45
* Forbid nodes upgrading to a version which cannot read existing log entries (CASSANDRA-21174)

src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ private int internalTrainingParameterResolution(CompressionParams compressionPar
471471
else
472472
resolvedValue = userSuppliedValue;
473473

474-
return new DataStorageSpec.IntKibibytesBound(resolvedValue).toBytes();
474+
return new DataStorageSpec.IntBytesBound(resolvedValue).toBytes();
475475
}
476476
catch (Throwable t)
477477
{

src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ static void validateSizeBasedTrainingParameter(String parameterName, String reso
5959
{
6060
try
6161
{
62-
new DataStorageSpec.IntKibibytesBound(resolvedValue).toBytes();
62+
new DataStorageSpec.IntBytesBound(resolvedValue).toBytes();
6363
}
6464
catch (Throwable t)
6565
{

src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ private static void validateParameters(PrintStream err, String trainingMaxDictio
182182
{
183183
try
184184
{
185-
new DataStorageSpec.IntKibibytesBound(trainingMaxDictionarySize).toBytes();
185+
new DataStorageSpec.IntBytesBound(trainingMaxDictionarySize).toBytes();
186186
}
187187
catch (Throwable t)
188188
{
@@ -195,7 +195,7 @@ private static void validateParameters(PrintStream err, String trainingMaxDictio
195195
{
196196
try
197197
{
198-
new DataStorageSpec.IntKibibytesBound(trainingMaxTotalSampleSize).toBytes();
198+
new DataStorageSpec.IntBytesBound(trainingMaxTotalSampleSize).toBytes();
199199
}
200200
catch (Throwable t)
201201
{

test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,33 @@ public void testSSTableBasedTrainingWithoutSSTables()
234234
.hasMessageContaining("No SSTables available for training");
235235
}
236236

237+
@Test
238+
public void testMaxBoundForTrainingParameters()
239+
{
240+
String table = createTable(getTableCql());
241+
ColumnFamilyStore cfs = Keyspace.open(keyspace()).getColumnFamilyStore(table);
242+
CompressionDictionaryManager manager = cfs.compressionDictionaryManager();
243+
244+
assertThatThrownBy(() -> manager.train(false, Map.of(TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME, "5GiB",
245+
TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME, TRAINING_MAX_TOTAL_SAMPLE_SIZE)))
246+
.as("Should fail when " + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + " is bigger than " + Integer.MAX_VALUE)
247+
.isInstanceOf(IllegalArgumentException.class)
248+
.hasMessageContaining("Invalid value for training_max_dictionary_size: 5GiB");
249+
250+
assertThatThrownBy(() -> manager.train(false, Map.of(TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME, TRAINING_MAX_DICTIONARY_SIZE,
251+
TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME, "5GiB")))
252+
.as("Should fail when " + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + " is bigger than " + Integer.MAX_VALUE)
253+
.isInstanceOf(IllegalArgumentException.class)
254+
.hasMessageContaining("Invalid value for " + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + ": 5GiB");
255+
}
256+
257+
@Test
258+
public void testInvalidTableCreation()
259+
{
260+
assertThatThrownBy(() -> createTable(getTableCqlWithInvalidTotalMaxSampleSize())).isInstanceOf(RuntimeException.class);
261+
assertThatThrownBy(() -> createTable(getTableCqlWithInvalidMaxDictionarySize())).isInstanceOf(RuntimeException.class);
262+
}
263+
237264
private String getTableCqlWithChunkLength()
238265
{
239266
return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " +
@@ -254,4 +281,24 @@ private String getTableCql()
254281
'\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '" + DEFAULT_TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_VALUE + '\'' +
255282
'}';
256283
}
284+
285+
private String getTableCqlWithInvalidTotalMaxSampleSize()
286+
{
287+
return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " +
288+
"WITH compression = {" +
289+
"'class': 'ZstdDictionaryCompressor'," +
290+
'\'' + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + "': '" + DEFAULT_TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_VALUE + "'," +
291+
'\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '5GiB'" +
292+
'}';
293+
}
294+
295+
private String getTableCqlWithInvalidMaxDictionarySize()
296+
{
297+
return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " +
298+
"WITH compression = {" +
299+
"'class': 'ZstdDictionaryCompressor'," +
300+
'\'' + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + "': '5GiB'," +
301+
'\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '" + DEFAULT_TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_VALUE + '\'' +
302+
'}';
303+
}
257304
}

0 commit comments

Comments
 (0)