diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 605f9501c5b23..8582d14811749 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1732,3 +1732,53 @@ Why does assignment fail when using chained indexing? This means that chained indexing will never work. See :ref:`this section ` for more context. + +.. _indexing.series_assignment: + +Series Assignment and Index Alignment +------------------------------------- + +When assigning a Series to a DataFrame column, pandas performs automatic alignment +based on index labels. This is a fundamental behavior that can be surprising to +new users who might expect positional assignment. + +Key Points: +~~~~~~~~~~~ + +* Series values are matched to DataFrame rows by index label +* Position/order in the Series doesn't matter +* Missing index labels result in NaN values +* This behavior is consistent across df[col] = series and df.loc[:, col] = series + +Examples: +.. ipython:: python + + import pandas as pd + + # Create a DataFrame + df = pd.DataFrame({'values': [1, 2, 3]}, index=['x', 'y', 'z']) + + # Series with matching indices (different order) + s1 = pd.Series([10, 20, 30], index=['z', 'x', 'y']) + df['aligned'] = s1 # Aligns by index, not position + print(df) + + # Series with partial index match + s2 = pd.Series([100, 200], index=['x', 'z']) + df['partial'] = s2 # Missing 'y' gets NaN + print(df) + + # Series with non-matching indices + s3 = pd.Series([1000, 2000], index=['a', 'b']) + df['nomatch'] = s3 # All values become NaN + print(df) + + + #Avoiding Confusion: + #If you want positional assignment instead of index alignment: + # Convert Series to array/list for positional assignment + + df['positional'] = s1.tolist() + + # Or reset the Series index to match DataFrame index + df['s1_values'] = s1.reindex(df.index) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 632ab12edd7e4..07db64249fe18 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4214,6 +4214,90 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) def __setitem__(self, key, value) -> None: + """ + Set item(s) in DataFrame by key. + + This method allows you to set the values of one or more columns in the + DataFrame using a key. The key can be a single column label, a list of + labels, or a boolean array. If the key does not exist, a new + column will be created. + + Parameters + ---------- + key : str, list of str, or tuple + Column label(s) to set. Can be a single column name, list of column names, + or tuple for MultiIndex columns. + value : scalar, array-like, Series, or DataFrame + Value(s) to set for the specified key(s). + + Returns + ------- + None + This method does not return a value. + + See Also + -------- + DataFrame.loc : Access and set values by label-based indexing. + DataFrame.iloc : Access and set values by position-based indexing. + DataFrame.assign : Assign new columns to a DataFrame. + + Notes + ----- + When assigning a Series to a DataFrame column, pandas aligns the Series + by index labels, not by position. This means: + + * Values from the Series are matched to DataFrame rows by index label + * If a Series index label doesn't exist in the DataFrame index, it's ignored + * If a DataFrame index label doesn't exist in the Series index, NaN is assigned + * The order of values in the Series doesn't matter; only the index labels matter + + Examples + -------- + Basic column assignment: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}) + >>> df["B"] = [4, 5, 6] # Assigns by position + >>> df + A B + 0 1 4 + 1 2 5 + 2 3 6 + + Series assignment with index alignment: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2]) + >>> s = pd.Series([10, 20], index=[1, 3]) # Note: index 3 doesn't exist in df + >>> df["B"] = s # Assigns by index label, not position + >>> df + A B + 0 1 NaN + 1 2 10 + 2 3 NaN + + Series assignment with partial index match: + + >>> df = pd.DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"]) + >>> s = pd.Series([100, 200], index=["b", "d"]) + >>> df["B"] = s + >>> df + A B + a 1 NaN + b 2 100 + c 3 NaN + d 4 200 + + Series index labels NOT in DataFrame, ignored: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=["x", "y", "z"]) + >>> s = pd.Series([10, 20, 30, 40, 50], index=["x", "y", "a", "b", "z"]) + >>> df["B"] = s + >>> df + A B + x 1 10 + y 2 20 + z 3 50 + # Values for 'a' and 'b' are completely ignored! + """ if not PYPY: if sys.getrefcount(self) <= 3: warnings.warn( diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f1f453cf433cf..0ef9d5002777c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -609,6 +609,22 @@ def loc(self) -> _LocIndexer: Please see the :ref:`user guide` for more details and explanations of advanced indexing. + + **Assignment with Series** + + When assigning a Series to .loc[row_indexer, col_indexer], pandas aligns + the Series by index labels, not by order or position. + + Series assignment with .loc and index alignment: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2]) + >>> s = pd.Series([10, 20], index=[1, 0]) # Note reversed order + >>> df.loc[:, "B"] = s # Aligns by index, not order + >>> df + A B + 0 1 20.0 + 1 2 10.0 + 2 3 NaN """ return _LocIndexer("loc", self)