pandas-dev · PedroM4rques · May 24, 2025 · May 26, 2025 · May 27, 2025 · May 27, 2025
@@ -69,6 +69,62 @@ For a ``Series`` accessor, you should validate the ``dtype`` if the accessor
 applies only to certain dtypes.
 
 
+Registering accessors via entry points
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can create a custom accessor for a pandas object and expose it via Python's
+entry point system. Once installed using pip, the accessor can be automatically
+discovered and registered by pandas at runtime, without requiring manual import.
+
+To register the entry point for your accessor, follow the format shown below:
+
+.. code-block:: python
+
+    # setup.py
+    entry_points={
+        'pandas.DataFrame.accessor': [ '<name> = <module>:<AccessorClass>', ... ],
+        'pandas.Series.accessor':    [ '<name> = <module>:<AccessorClass>', ... ],
+        'pandas.Index.accessor':     [ '<name> = <module>:<AccessorClass>', ... ],
+    }
+
+Alternatively, if you are using a ``pyproject.toml``-based build:
+
+.. code-block:: none
+
+    # pyproject.toml
+    [project.entry-points."pandas.DataFrame.accessor"]
+    <name> = "<module>:<AccessorClass>"
+
+    [project.entry-points."pandas.Series.accessor"]
+    <name> = "<module>:<AccessorClass>"
+
+    [project.entry-points."pandas.Index.accessor"]
+    <name> = "<module>:<AccessorClass>"
+
+
+Assuming the accessor class ``GeoAccessor`` is defined in the module
+``geoPlugin.geo_accessor``, and using the accessor name ``geo`` as in the
+example above:
+
+.. code-block:: python
+
+    # setup.py
+    entry_points={
+        'pandas.DataFrame.accessor': [ 'geo = geoPlugin.geo_accessor:GeoAccessor' ],
+    }
+
+Or, for a ``pyproject.toml``-based build:
+
+.. code-block:: toml
+
+    # pyproject.toml
+    [project.entry-points."pandas.DataFrame.accessor"]
+    geo = "geoPlugin.geo_accessor:GeoAccessor"
+
+
+For background on Python's Entry Point system and Plugins:
+https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#plugin-entry-points
+
 .. _extending.extension-types:
 
 Extension types

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -83,6 +83,7 @@ Other enhancements
 - Improved deprecation message for offset aliases (:issue:`60820`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
+- Support :class:`DataFrame`, :class:`Series` and :class:`Index` plugin accessors via entry points (:issue:`29076`)
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -346,3 +346,8 @@
     "unique",
     "wide_to_long",
 ]
+
+from .core.accessor import accessor_entry_point_loader
+
+accessor_entry_point_loader()
+del accessor_entry_point_loader
diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -25,6 +25,11 @@
     from pandas import Index
     from pandas.core.generic import NDFrame
 
+from importlib.metadata import (
+    EntryPoints,
+    entry_points,
+)
+
 
 class DirNamesMixin:
     _accessors: set[str] = set()
@@ -393,3 +398,122 @@ def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]:
     from pandas import Index
 
     return _register_accessor(name, Index)
+
+
+def accessor_entry_point_loader() -> None:
+    """
+    Load and register pandas accessors declared via entry points.
+
+    This function scans the 'pandas.<pd_obj>.accessor' entry point group for
+    accessors registered by third-party packages. These accessors extend
+    core pandas objects (`DataFrame`, `Series`, `Index`).
+
+    Each entry point is expected to follow the format:
+
+        # setup.py
+        entry_points={
+            'pandas.DataFrame.accessor': [ <name> = <module>:<AccessorClass>, ... ],
+            'pandas.Series.accessor':    [ <name> = <module>:<AccessorClass>, ... ],
+            'pandas.Index.accessor':     [ <name> = <module>:<AccessorClass>, ... ],
+        }
+
+    OR using pyproject.toml file:
+
+        # pyproject.toml
+        [project.entry-points."pandas.DataFrame.accessor"]
+        <name> = "<module>:<AccessorClass>"
+
+        [project.entry-points."pandas.Series.accessor"]
+        <name> = "<module>:<AccessorClass>"
+
+        [project.entry-points."pandas.Index.accessor"]
+        <name> = "<module>:<AccessorClass>"
+
+
+    For each valid entry point:
+    - The accessor class is dynamically imported and registered using
+      the appropriate registration decorator function
+      (e.g. register_dataframe_accessor).
+    - If two packages declare the same accessor name, a warning is issued,
+      and only the first one is used.
+
+    Notes
+    -----
+    - This function is only intended to be called at pandas startup.
+    - For more information about accessors, refer to:
+        - Pandas documentation on extending accessors:
+          https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors
+        - Series accessor API reference:
+          https://pandas.pydata.org/docs/reference/series.html#accessors
+        - Note: DataFrame and Index accessors (e.g., `.sparse`, `.str`) use the same
+          mechanism but are not listed in separate reference pages as of now.
+
+    - For background on Python plugin entry points:
+    https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#plugin-entry-points
+
+    Raises
+    ------
+    UserWarning
+        If two accessors share the same name, the second one is ignored.
+
+    Examples
+    --------
+        # setup.py
+        entry_points={
+            'pandas.DataFrame.accessor': [
+                'myplugin = myplugin.accessor:MyPluginAccessor',
+            ],
+        }
+        # END setup.py
+
+        - That entrypoint would allow the following code:
+
+        import pandas as pd
+
+        df = pd.DataFrame({"A": [1, 2, 3]})
+        df.myplugin.do_something() # Calls MyPluginAccessor.do_something()
+    """
+
+    ACCESSOR_REGISTRY_FUNCTIONS: dict[str, Callable] = {
+        "pandas.DataFrame.accessor": register_dataframe_accessor,
+        "pandas.Series.accessor": register_series_accessor,
+        "pandas.Index.accessor": register_index_accessor,
+    }
+
+    PD_OBJ_ENTRYPOINTS: tuple[str, ...] = tuple(ACCESSOR_REGISTRY_FUNCTIONS.keys())
+
+    for pd_obj_entrypoint in PD_OBJ_ENTRYPOINTS:
+        accessors: EntryPoints = entry_points(group=pd_obj_entrypoint)
+        accessor_package_dict: dict[str, str] = {}
+
+        for new_accessor in accessors:
+            dist = getattr(new_accessor, "dist", None)
+            new_pkg_name = getattr(dist, "name", "Unknown") if dist else "Unknown"
+
+            # Verifies duplicated accessor names
+            if new_accessor.name in accessor_package_dict:
+                loaded_pkg_name: str = accessor_package_dict.get(
+                    new_accessor.name, "Unknown"
+                )
+
+                warnings.warn(
+                    "Warning: you have two accessors with the same name:"
+                    f" '{new_accessor.name}' has already been registered"
+                    f" by the package '{new_pkg_name}'. The "
+                    f"'{new_accessor.name}' provided by the package "
+                    f"'{loaded_pkg_name}' is not being used. "
+                    "Uninstall the package you don't want"
+                    "to use if you want to get rid of this warning.\n",
+                    UserWarning,
+                    stacklevel=2,
+                )
+
+            accessor_package_dict.update({new_accessor.name: new_pkg_name})
+
+            def make_accessor(ep):
+                return lambda self, ep=ep: ep.load()(self)
+
+            register_fn = ACCESSOR_REGISTRY_FUNCTIONS.get(pd_obj_entrypoint)
+
+            if register_fn is not None:
+                register_fn(new_accessor.name)(make_accessor(new_accessor))
diff --git a/pandas/tests/test_plugins_entrypoint_loader.py b/pandas/tests/test_plugins_entrypoint_loader.py
@@ -0,0 +1,438 @@
+from typing import Any
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.accessor import accessor_entry_point_loader
+
+PD_OBJECTS_ENTRYPOINTS = [
+    "pandas.DataFrame.accessor",
+    "pandas.Series.accessor",
+    "pandas.Index.accessor",
+]
+
+
+def create_mock_entry_points(entry_points: dict[str, list[tuple[str, Any, str]]]):
+    """
+    Auxiliary function to create mock entry points for testing accessor loading.
+
+    Parameters:
+    -----------
+    entry_points : list of tuple
+        List of (name, accessor_class, dist_name) where:
+        - name: str, the name of the accessor
+        - accessor_class: class, the accessor class to be returned by load()
+        - dist_name: str, the name of the distribution (package)
+
+    Returns:
+    --------
+    function
+        A mock_entry_points function that returns the mocked entry points.
+    """
+
+    class MockDistribution:
+        def __init__(self, name):
+            self.name = name
+
+    class MockEntryPoint:
+        def __init__(self, name, accessor_class, dist_name):
+            self.name = name
+            self._accessor_class = accessor_class
+            self.dist = MockDistribution(dist_name)
+
+        def load(self):
+            return self._accessor_class
+
+    # Create a dictionary of MockEntryPoint instances
+    group_map: dict[str, list[MockEntryPoint]] = {g: [] for g in PD_OBJECTS_ENTRYPOINTS}
+
+    for ep_group, ep_properties in entry_points.items():
+        for name, accessor_class, dist_name in ep_properties:
+            group_map[ep_group].append(MockEntryPoint(name, accessor_class, dist_name))
+
+    def mock_entry_points(*, group):
+        return group_map.get(group, [])
+
+    return mock_entry_points
+
+
+def test_no_accessors(monkeypatch):
+    # No entry points
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.DataFrame.accessor": [],
+            "pandas.Series.accessor": [],
+            "pandas.Index.accessor": [],
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    accessor_entry_point_loader()
+
+
+def test_load_dataframe_accessors(monkeypatch):
+    class TestAccessor:
+        def __init__(self, df):
+            self._df = df
+
+        def test_method(self):
+            return "success"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.DataFrame.accessor": [
+                (
+                    "test_accessor",
+                    TestAccessor,
+                    "TestPackage",
+                )
+            ],
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    accessor_entry_point_loader()
+
+    # Create DataFrame and verify that the accessor was registered
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    assert hasattr(df, "test_accessor")
+    assert df.test_accessor.test_method() == "success"
+
+
+def test_load_series_accessors(monkeypatch):
+    class TestAccessor:
+        def __init__(self, ser):
+            self._ser = ser
+
+        def test_method(self):
+            return "success"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.Series.accessor": [("test_accessor", TestAccessor, "TestPackage")],
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    accessor_entry_point_loader()
+
+    # Create Series and verify that the accessor was registered
+    s = pd.Series([1, 2, 3])
+    assert hasattr(s, "test_accessor")
+    assert s.test_accessor.test_method() == "success"
+
+
+def test_load_index_accessors(monkeypatch):
+    class TestAccessor:
+        def __init__(self, idx):
+            self._idx = idx
+
+        def test_method(self):
+            return "success"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.Index.accessor": [("test_accessor", TestAccessor, "TestPackage")],
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    accessor_entry_point_loader()
+
+    # Create Index and verify that the accessor was registered
+    idx = pd.Index([1, 2, 3])
+    assert hasattr(idx, "test_accessor")
+    assert idx.test_accessor.test_method() == "success"
+
+
+def test_duplicate_dataframe_accessor_names(monkeypatch):
+    class Accessor1:
+        def __init__(self, df):
+            self._df = df
+
+        def which(self):
+            return "Accessor1"
+
+    class Accessor2:
+        def __init__(self, df):
+            self._df = df
+
+        def which(self):
+            return "Accessor2"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.DataFrame.accessor": [
+                ("duplicate_accessor", Accessor1, "TestPackage1"),
+                ("duplicate_accessor", Accessor2, "TestPackage2"),
+            ]
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    # Check that the UserWarning is raised
+    with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record:
+        accessor_entry_point_loader()
+
+    messages = [str(w.message) for w in record]
+    assert any("you have two accessors with the same name:" in msg for msg in messages)
+
+    df = pd.DataFrame({"x": [1, 2, 3]})
+    assert hasattr(df, "duplicate_accessor")
+    assert df.duplicate_accessor.which() == "Accessor2"  # Last registered accessor
+
+
+def test_duplicate_series_accessor_names(monkeypatch):
+    class Accessor1:
+        def __init__(self, series):
+            self._series = series
+
+        def which(self):
+            return "Accessor1"
+
+    class Accessor2:
+        def __init__(self, series):
+            self._series = series
+
+        def which(self):
+            return "Accessor2"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.Series.accessor": [
+                ("duplicate_accessor", Accessor1, "TestPackage1"),
+                ("duplicate_accessor", Accessor2, "TestPackage2"),
+            ]
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    # Check that the UserWarning is raised
+    with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record:
+        accessor_entry_point_loader()
+
+    messages = [str(w.message) for w in record]
+    assert any("you have two accessors with the same name:" in msg for msg in messages)
+
+    s = pd.Series([1, 2, 3])
+    assert hasattr(s, "duplicate_accessor")
+    assert s.duplicate_accessor.which() == "Accessor2"  # Last registered accessor
+
+
+def test_duplicate_index_accessor_names(monkeypatch):
+    class Accessor1:
+        def __init__(self, idx):
+            self._idx = idx
+
+        def which(self):
+            return "Accessor1"
+
+    class Accessor2:
+        def __init__(self, idx):
+            self._idx = idx
+
+        def which(self):
+            return "Accessor2"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.Index.accessor": [
+                ("duplicate_accessor", Accessor1, "TestPackage1"),
+                ("duplicate_accessor", Accessor2, "TestPackage2"),
+            ]
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    # Check that the UserWarning is raised
+    with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record:
+        accessor_entry_point_loader()
+
+    messages = [str(w.message) for w in record]
+    assert any("you have two accessors with the same name:" in msg for msg in messages)
+
+    idx = pd.Index([1, 2, 3])
+    assert hasattr(idx, "duplicate_accessor")
+    assert idx.duplicate_accessor.which() == "Accessor2"  # Last registered accessor
+
+
+def test_wrong_obj_accessor(monkeypatch):
+    class Accessor1:
+        def __init__(self, obj):
+            self._obj = obj
+
+        def which(self):
+            return "Accessor1"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.DataFrame.accessor": [
+                ("accessor", Accessor1, "TestPackage1"),
+            ]
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    accessor_entry_point_loader()
+
+    # Check that the accessor is not registered for Index
+    idx = pd.Index([1, 2, 3])
+    assert not hasattr(idx, "accessor"), "Accessor should not be registered for Index"
+
+    df = pd.DataFrame({"x": [1, 2, 3]})
+    assert hasattr(df, "accessor")
+    assert df.accessor.which() == "Accessor1"
+
+
+def test_unique_accessor_names(monkeypatch):
+    class Accessor1:
+        def __init__(self, df):
+            self._df = df
+
+        def which(self):
+            return "Accessor1"
+
+    class Accessor2:
+        def __init__(self, df):
+            self._df = df
+
+        def which(self):
+            return "Accessor2"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.DataFrame.accessor": [
+                ("accessor1", Accessor1, "Package1"),
+                ("accessor2", Accessor2, "Package2"),
+            ]
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    # Check that no UserWarning is raised
+    with tm.assert_produces_warning(None, check_stacklevel=False):
+        accessor_entry_point_loader()
+
+    df = pd.DataFrame({"x": [1, 2, 3]})
+    assert hasattr(df, "accessor1"), "Accessor1 not registered"
+    assert hasattr(df, "accessor2"), "Accessor2 not registered"
+
+    assert df.accessor1.which() == "Accessor1", "Accessor1 method incorrect"
+    assert df.accessor2.which() == "Accessor2", "Accessor2 method incorrect"
+
+
+def test_duplicate_and_unique_accessor_names(monkeypatch):
+    class Accessor1:
+        def __init__(self, df):
+            self._df = df
+
+        def which(self):
+            return "Accessor1"
+
+    class Accessor2:
+        def __init__(self, df):
+            self._df = df
+
+        def which(self):
+            return "Accessor2"
+
+    class Accessor3:
+        def __init__(self, df):
+            self._df = df
+
+        def which(self):
+            return "Accessor3"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.DataFrame.accessor": [
+                ("duplicate_accessor", Accessor1, "Package1"),
+                ("duplicate_accessor", Accessor2, "Package2"),
+                ("unique_accessor", Accessor3, "Package3"),
+            ]
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    # Capture warnings
+    with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record:
+        accessor_entry_point_loader()
+
+    messages = [str(w.message) for w in record]
+
+    # Filter warnings for the specific message about duplicate accessors
+    duplicate_package_warnings = [
+        msg
+        for msg in messages
+        if "you have two accessors with the same name: 'duplicate_accessor'" in msg
+    ]
+
+    # Assert one warning about duplicate accessors
+    assert len(duplicate_package_warnings) == 1, (
+        f"Expected exactly one warning about duplicate accessors, "
+        f"got {len(duplicate_package_warnings)}: {duplicate_package_warnings}"
+    )
+
+    df = pd.DataFrame({"x": [1, 2, 3]})
+    assert hasattr(df, "duplicate_accessor"), "duplicate_accessor not registered"
+    assert hasattr(df, "unique_accessor"), "unique_accessor not registered"
+
+    assert df.duplicate_accessor.which() == "Accessor2", (
+        "duplicate_accessor should use Accessor2"
+    )
+    assert df.unique_accessor.which() == "Accessor3", "unique_accessor method incorrect"
+
+
+def test_duplicate_names_different_pandas_objs(monkeypatch):
+    class Accessor1:
+        def __init__(self, obj):
+            self._obj = obj
+
+        def which(self):
+            return "Accessor1"
+
+    class Accessor2:
+        def __init__(self, obj):
+            self._obj = obj
+
+        def which(self):
+            return "Accessor2"
+
+    mock_entry_points = create_mock_entry_points(
+        {
+            "pandas.DataFrame.accessor": [
+                ("acc1", Accessor1, "Package1"),
+                ("acc2", Accessor2, "Package2"),
+            ],
+            "pandas.Series.accessor": [
+                ("acc1", Accessor1, "Package1"),
+                ("acc2", Accessor2, "Package2"),
+            ],
+            "pandas.Index.accessor": [
+                ("acc1", Accessor1, "Package1"),
+                ("acc2", Accessor2, "Package2"),
+            ],
+        }
+    )
+    monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)
+
+    # Check that no UserWarning is raised
+    with tm.assert_produces_warning(None, check_stacklevel=False):
+        accessor_entry_point_loader()
+
+    df = pd.DataFrame({"x": [1, 2, 3]})
+    assert hasattr(df, "acc1")
+    assert df.acc1.which() == "Accessor1"
+    assert hasattr(df, "acc2")
+    assert df.acc2.which() == "Accessor2"
+
+    s = pd.Series([1, 2, 3])
+    assert hasattr(s, "acc1")
+    assert s.acc1.which() == "Accessor1"
+    assert hasattr(s, "acc2")
+    assert s.acc2.which() == "Accessor2"
+
+    idx = pd.Index([1, 2, 3])
+    assert hasattr(idx, "acc1")
+    assert idx.acc1.which() == "Accessor1"
+    assert hasattr(idx, "acc2")
+    assert idx.acc2.which() == "Accessor2"