Skip to content

Commit fdfb78b

Browse files
authored
fix: [2.5] Fix duplicate autoID between import and insert (#42520)
Remove the unlimited logID mechanism and switch to redundantly allocating a large number of IDs. issue: #42518 pr: #42519 Signed-off-by: bigsheeper <[email protected]>
1 parent a0a6510 commit fdfb78b

File tree

5 files changed

+35
-18
lines changed

5 files changed

+35
-18
lines changed

internal/datacoord/import_util.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,14 +296,21 @@ func AssembleImportRequest(task ImportTask, job ImportJob, meta *meta, alloc all
296296
return stat.GetTotalRows()
297297
})
298298

299-
// Allocated IDs are used for rowID and the BEGINNING of the logID.
300-
allocNum := totalRows + 1
299+
// Pre-allocate IDs for autoIDs and logIDs.
300+
preAllocIDNum := (totalRows + 1) * paramtable.Get().DataCoordCfg.ImportPreAllocIDExpansionFactor.GetAsInt64()
301301

302-
idBegin, idEnd, err := alloc.AllocN(allocNum)
302+
idBegin, idEnd, err := alloc.AllocN(preAllocIDNum)
303303
if err != nil {
304304
return nil, err
305305
}
306306

307+
log.Info("pre-allocate ids and ts for import task", WrapTaskLog(task,
308+
zap.Int64("totalRows", totalRows),
309+
zap.Int64("idBegin", idBegin),
310+
zap.Int64("idEnd", idEnd),
311+
zap.Uint64("ts", ts))...,
312+
)
313+
307314
importFiles := lo.Map(task.GetFileStats(), func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile {
308315
return fileStat.GetImportFile()
309316
})

internal/datanode/importv2/task_import.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package importv2
1919
import (
2020
"context"
2121
"io"
22-
"math"
2322
"time"
2423

2524
"github.com/cockroachdb/errors"
@@ -66,8 +65,8 @@ func NewImportTask(req *datapb.ImportRequest,
6665
if importutilv2.IsBackup(req.GetOptions()) {
6766
UnsetAutoID(req.GetSchema())
6867
}
69-
// Setting end as math.MaxInt64 to incrementally allocate logID.
70-
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), math.MaxInt64)
68+
// Allocator for autoIDs and logIDs.
69+
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), req.GetIDRange().GetEnd())
7170
task := &ImportTask{
7271
ImportTaskV2: &datapb.ImportTaskV2{
7372
JobID: req.GetJobID(),

internal/datanode/importv2/task_l0_import.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"context"
2121
"fmt"
2222
"io"
23-
"math"
2423
"time"
2524

2625
"github.com/cockroachdb/errors"
@@ -61,8 +60,8 @@ func NewL0ImportTask(req *datapb.ImportRequest,
6160
cm storage.ChunkManager,
6261
) Task {
6362
ctx, cancel := context.WithCancel(context.Background())
64-
// Setting end as math.MaxInt64 to incrementally allocate logID.
65-
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), math.MaxInt64)
63+
// Allocator for autoIDs and logIDs.
64+
alloc := allocator.NewLocalAllocator(req.GetIDRange().GetBegin(), req.GetIDRange().GetEnd())
6665
task := &L0ImportTask{
6766
ImportTaskV2: &datapb.ImportTaskV2{
6867
JobID: req.GetJobID(),

internal/datanode/services.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,9 @@ func (node *DataNode) ImportV2(ctx context.Context, req *datapb.ImportRequest) (
461461
zap.Int64("collectionID", req.GetCollectionID()),
462462
zap.Int64s("partitionIDs", req.GetPartitionIDs()),
463463
zap.Strings("vchannels", req.GetVchannels()),
464+
zap.Uint64("ts", req.GetTs()),
465+
zap.Int64("idBegin", req.GetIDRange().GetBegin()),
466+
zap.Int64("idEnd", req.GetIDRange().GetEnd()),
464467
zap.Any("segments", req.GetRequestSegments()),
465468
zap.Any("files", req.GetFiles()))
466469

pkg/util/paramtable/component_param.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3629,15 +3629,16 @@ type dataCoordConfig struct {
36293629
CheckAutoBalanceConfigInterval ParamItem `refreshable:"false"`
36303630

36313631
// import
3632-
FilesPerPreImportTask ParamItem `refreshable:"true"`
3633-
ImportTaskRetention ParamItem `refreshable:"true"`
3634-
MaxSizeInMBPerImportTask ParamItem `refreshable:"true"`
3635-
ImportScheduleInterval ParamItem `refreshable:"true"`
3636-
ImportCheckIntervalHigh ParamItem `refreshable:"true"`
3637-
ImportCheckIntervalLow ParamItem `refreshable:"true"`
3638-
MaxFilesPerImportReq ParamItem `refreshable:"true"`
3639-
MaxImportJobNum ParamItem `refreshable:"true"`
3640-
WaitForIndex ParamItem `refreshable:"true"`
3632+
FilesPerPreImportTask ParamItem `refreshable:"true"`
3633+
ImportTaskRetention ParamItem `refreshable:"true"`
3634+
MaxSizeInMBPerImportTask ParamItem `refreshable:"true"`
3635+
ImportScheduleInterval ParamItem `refreshable:"true"`
3636+
ImportCheckIntervalHigh ParamItem `refreshable:"true"`
3637+
ImportCheckIntervalLow ParamItem `refreshable:"true"`
3638+
MaxFilesPerImportReq ParamItem `refreshable:"true"`
3639+
MaxImportJobNum ParamItem `refreshable:"true"`
3640+
WaitForIndex ParamItem `refreshable:"true"`
3641+
ImportPreAllocIDExpansionFactor ParamItem `refreshable:"true"`
36413642

36423643
GracefulStopTimeout ParamItem `refreshable:"true"`
36433644

@@ -4540,6 +4541,14 @@ if param targetVecIndexVersion is not set, the default value is -1, which means
45404541
}
45414542
p.WaitForIndex.Init(base.mgr)
45424543

4544+
p.ImportPreAllocIDExpansionFactor = ParamItem{
4545+
Key: "dataCoord.import.preAllocateIDExpansionFactor",
4546+
Version: "2.5.13",
4547+
DefaultValue: "10",
4548+
Doc: `The expansion factor for pre-allocating IDs during import.`,
4549+
}
4550+
p.ImportPreAllocIDExpansionFactor.Init(base.mgr)
4551+
45434552
p.GracefulStopTimeout = ParamItem{
45444553
Key: "dataCoord.gracefulStopTimeout",
45454554
Version: "2.3.7",

0 commit comments

Comments
 (0)