@@ -522,7 +522,7 @@ def factorize_(
522
522
# this is important in shared-memory parallelism with dask
523
523
# TODO: figure out how to avoid this
524
524
idx = flat .copy ()
525
- found_groups .append (np .array (expect ))
525
+ found_groups .append (np .array (expect , like = flat ))
526
526
# TODO: fix by using masked integers
527
527
idx [idx > expect [- 1 ]] = - 1
528
528
@@ -537,7 +537,7 @@ def factorize_(
537
537
right = expect .closed_right
538
538
idx = np .digitize (
539
539
flat ,
540
- bins = bins .view (np .intp ) if bins .dtype .kind == "M" else bins ,
540
+ bins = np . array ( bins .view (np .intp ) if bins .dtype .kind == "M" else bins , like = flat ) ,
541
541
right = right ,
542
542
)
543
543
idx -= 1
@@ -560,9 +560,13 @@ def factorize_(
560
560
idx = sorter [(idx ,)]
561
561
idx [mask ] = - 1
562
562
else :
563
- idx , groups = pd .factorize (flat , sort = sort )
563
+ if isinstance (flat , np .ndarray ):
564
+ idx , groups = pd .factorize (flat , sort = sort )
565
+ else :
566
+ assert sort
567
+ groups , idx = np .unique (flat , return_inverse = True )
564
568
565
- found_groups .append (np . array ( groups ) )
569
+ found_groups .append (groups )
566
570
factorized .append (idx .reshape (groupvar .shape ))
567
571
568
572
grp_shape = tuple (len (grp ) for grp in found_groups )
0 commit comments