Skip to content

DEPR/CLN: Remove freq parameters from df.rolling/expanding/ewm #18601

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 6, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
@@ -150,6 +150,8 @@ Removal of prior version deprecations/changes
- The ``SparseList`` class has been removed (:issue:`14007`)
- The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`)
- ``Categorical.from_array`` has been removed (:issue:`13854`)
- The ``freq`` parameter has been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame
and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:18601)

.. _whatsnew_0220.performance:

12 changes: 6 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
@@ -7357,31 +7357,31 @@ def _add_series_or_dataframe_operations(cls):
from pandas.core import window as rwindow

@Appender(rwindow.rolling.__doc__)
def rolling(self, window, min_periods=None, freq=None, center=False,
def rolling(self, window, min_periods=None, center=False,
win_type=None, on=None, axis=0, closed=None):
axis = self._get_axis_number(axis)
return rwindow.rolling(self, window=window,
min_periods=min_periods, freq=freq,
min_periods=min_periods,
center=center, win_type=win_type,
on=on, axis=axis, closed=closed)

cls.rolling = rolling

@Appender(rwindow.expanding.__doc__)
def expanding(self, min_periods=1, freq=None, center=False, axis=0):
def expanding(self, min_periods=1, center=False, axis=0):
axis = self._get_axis_number(axis)
return rwindow.expanding(self, min_periods=min_periods, freq=freq,
return rwindow.expanding(self, min_periods=min_periods,
center=center, axis=axis)

cls.expanding = expanding

@Appender(rwindow.ewm.__doc__)
def ewm(self, com=None, span=None, halflife=None, alpha=None,
min_periods=0, freq=None, adjust=True, ignore_na=False,
min_periods=0, adjust=True, ignore_na=False,
axis=0):
axis = self._get_axis_number(axis)
return rwindow.ewm(self, com=com, span=span, halflife=halflife,
alpha=alpha, min_periods=min_periods, freq=freq,
alpha=alpha, min_periods=min_periods,
adjust=adjust, ignore_na=ignore_na, axis=axis)

cls.ewm = ewm
82 changes: 26 additions & 56 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
@@ -58,27 +58,21 @@


class _Window(PandasObject, SelectionMixin):
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type',
_attributes = ['window', 'min_periods', 'center', 'win_type',
'axis', 'on', 'closed']
exclusions = set()

def __init__(self, obj, window=None, min_periods=None, freq=None,
def __init__(self, obj, window=None, min_periods=None,
center=False, win_type=None, axis=0, on=None, closed=None,
**kwargs):

if freq is not None:
warnings.warn("The freq kw is deprecated and will be removed in a "
"future version. You can resample prior to passing "
"to a window function", FutureWarning, stacklevel=3)

self.__dict__.update(kwargs)
self.blocks = []
self.obj = obj
self.on = on
self.closed = closed
self.window = window
self.min_periods = min_periods
self.freq = freq
self.center = center
self.win_type = win_type
self.win_freq = None
@@ -117,16 +111,6 @@ def _convert_freq(self, how=None):

obj = self._selected_obj
index = None
if (self.freq is not None and
isinstance(obj, (ABCSeries, ABCDataFrame))):
if how is not None:
warnings.warn("The how kw argument is deprecated and removed "
"in a future version. You can resample prior "
"to passing to a window function", FutureWarning,
stacklevel=6)

obj = obj.resample(self.freq).aggregate(how or 'asfreq')

return obj, index

def _create_blocks(self, how):
@@ -374,14 +358,11 @@ class Window(_Window):
Minimum number of observations in window required to have a value
(otherwise result is NA). For a window that is specified by an offset,
this will default to 1.
freq : string or DateOffset object, optional (default None)
.. deprecated:: 0.18.0
Frequency to conform the data to before computing the statistic.
Specified as a frequency string or DateOffset object.
center : boolean, default False
Set the labels at the center of the window.
win_type : string, default None
Provide a window type. See the notes below.
Provide a window type. If ``None``, all points are evenly weighted.
See the notes below for further information.
on : string, optional
For a DataFrame, column on which to calculate
the rolling window, rather than the index
@@ -479,10 +460,6 @@ class Window(_Window):
By default, the result is set to the right edge of the window. This can be
changed to the center of the window by setting ``center=True``.
The `freq` keyword is used to conform time series data to a specified
frequency by resampling the data. This is done with the default parameters
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
To learn more about the offsets & frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -506,6 +483,11 @@ class Window(_Window):
If ``win_type=None`` all points are evenly weighted. To learn more about
different window types see `scipy.signal window functions
<https://docs.scipy.org/doc/scipy/reference/signal.html#window-functions>`__.
See Also
--------
expanding : Provides expanding transformations.
ewm : Provides exponential weighted functions
"""

def validate(self):
@@ -876,8 +858,6 @@ def sum(self, *args, **kwargs):

def max(self, how=None, *args, **kwargs):
nv.validate_window_func('max', args, kwargs)
if self.freq is not None and how is None:
how = 'max'
return self._apply('roll_max', 'max', how=how, **kwargs)

_shared_docs['min'] = dedent("""
@@ -891,8 +871,6 @@ def max(self, how=None, *args, **kwargs):

def min(self, how=None, *args, **kwargs):
nv.validate_window_func('min', args, kwargs)
if self.freq is not None and how is None:
how = 'min'
return self._apply('roll_min', 'min', how=how, **kwargs)

def mean(self, *args, **kwargs):
@@ -909,8 +887,6 @@ def mean(self, *args, **kwargs):
Method for down- or re-sampling""")

def median(self, how=None, **kwargs):
if self.freq is not None and how is None:
how = 'median'
return self._apply('roll_median_c', 'median', how=how, **kwargs)

_shared_docs['std'] = dedent("""
@@ -1060,9 +1036,9 @@ def corr(self, other=None, pairwise=None, **kwargs):

def _get_corr(a, b):
a = a.rolling(window=window, min_periods=self.min_periods,
freq=self.freq, center=self.center)
center=self.center)
b = b.rolling(window=window, min_periods=self.min_periods,
freq=self.freq, center=self.center)
center=self.center)

return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs))

@@ -1136,7 +1112,7 @@ def _validate_monotonic(self):
"monotonic".format(formatted))

def _validate_freq(self):
""" validate & return our freq """
""" validate & return window frequency """
from pandas.tseries.frequencies import to_offset
try:
return to_offset(self.window)
@@ -1346,10 +1322,6 @@ class Expanding(_Rolling_and_Expanding):
min_periods : int, default None
Minimum number of observations in window required to have a value
(otherwise result is NA).
freq : string or DateOffset object, optional (default None)
.. deprecated:: 0.18.0
Frequency to conform the data to before computing the statistic.
Specified as a frequency string or DateOffset object.
center : boolean, default False
Set the labels at the center of the window.
axis : int or string, default 0
@@ -1382,17 +1354,18 @@ class Expanding(_Rolling_and_Expanding):
By default, the result is set to the right edge of the window. This can be
changed to the center of the window by setting ``center=True``.
The `freq` keyword is used to conform time series data to a specified
frequency by resampling the data. This is done with the default parameters
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
See Also
--------
rolling : Provides rolling window calculations
ewm : Provides exponential weighted functions
"""

_attributes = ['min_periods', 'freq', 'center', 'axis']
_attributes = ['min_periods', 'center', 'axis']

def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0,
def __init__(self, obj, min_periods=1, center=False, axis=0,
**kwargs):
super(Expanding, self).__init__(obj=obj, min_periods=min_periods,
freq=freq, center=center, axis=axis)
center=center, axis=axis)

@property
def _constructor(self):
@@ -1611,9 +1584,6 @@ class EWM(_Rolling):
min_periods : int, default 0
Minimum number of observations in window required to have a value
(otherwise result is NA).
freq : None or string alias / date offset object, default=None
.. deprecated:: 0.18.0
Frequency to conform to before computing statistic
adjust : boolean, default True
Divide by decaying adjustment factor in beginning periods to account
for imbalance in relative weightings (viewing EWMA as a moving average)
@@ -1651,10 +1621,6 @@ class EWM(_Rolling):
parameter descriptions above; see the link at the end of this section for
a detailed explanation.
The `freq` keyword is used to conform time series data to a specified
frequency by resampling the data. This is done with the default parameters
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
When adjust is True (default), weighted averages are calculated using
weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1.
@@ -1674,16 +1640,20 @@ class EWM(_Rolling):
More details can be found at
http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
See Also
--------
rolling : Provides rolling window calculations
expanding : Provides expanding transformations.
"""
_attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis']
_attributes = ['com', 'min_periods', 'adjust', 'ignore_na', 'axis']

def __init__(self, obj, com=None, span=None, halflife=None, alpha=None,
min_periods=0, freq=None, adjust=True, ignore_na=False,
min_periods=0, adjust=True, ignore_na=False,
axis=0):
self.obj = obj
self.com = _get_center_of_mass(com, span, halflife, alpha)
self.min_periods = min_periods
self.freq = freq
self.adjust = adjust
self.ignore_na = ignore_na
self.axis = axis
3 changes: 2 additions & 1 deletion pandas/stats/moments.py
Original file line number Diff line number Diff line change
@@ -208,6 +208,8 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs):
if value is not None:
kwds[k] = value

# TODO: the below is only in place temporary until this module is removed.
kwargs.pop('freq', None) # freq removed in 0.22
# how is a keyword that if not-None should be in kwds
how = kwargs.pop('how', None)
if how is not None:
@@ -680,7 +682,6 @@ def f(arg, min_periods=1, freq=None, **kwargs):
name,
arg,
min_periods=min_periods,
freq=freq,
func_kw=func_kw,
**kwargs)
return f
96 changes: 27 additions & 69 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,6 @@
from datetime import datetime, timedelta
from numpy.random import randn
import numpy as np
from distutils.version import LooseVersion

import pandas as pd
from pandas import (Series, DataFrame, bdate_range, isna,
@@ -284,33 +283,6 @@ def test_preserve_metadata(self):
assert s2.name == 'foo'
assert s3.name == 'foo'

def test_how_compat(self):
# in prior versions, we would allow how to be used in the resample
# now that its deprecated, we need to handle this in the actual
# aggregation functions
s = Series(np.random.randn(20),
index=pd.date_range('1/1/2000', periods=20, freq='12H'))

for how in ['min', 'max', 'median']:
for op in ['mean', 'sum', 'std', 'var', 'kurt', 'skew']:
for t in ['rolling', 'expanding']:

with catch_warnings(record=True):

dfunc = getattr(pd, "{0}_{1}".format(t, op))
if dfunc is None:
continue

if t == 'rolling':
kwargs = {'window': 5}
else:
kwargs = {}
result = dfunc(s, freq='D', how=how, **kwargs)

expected = getattr(
getattr(s, t)(freq='D', **kwargs), op)(how=how)
tm.assert_series_equal(result, expected)


class TestWindow(Base):

@@ -1452,22 +1424,18 @@ def get_result(arr, window, min_periods=None, center=False):
def _check_structures(self, f, static_comp, name=None,
has_min_periods=True, has_time_rule=True,
has_center=True, fill_value=None, **kwargs):
def get_result(obj, window, min_periods=None, freq=None, center=False):
def get_result(obj, window, min_periods=None, center=False):

# check via the API calls if name is provided
if name is not None:

# catch a freq deprecation warning if freq is provided and not
# None
with catch_warnings(record=True):
r = obj.rolling(window=window, min_periods=min_periods,
freq=freq, center=center)
r = obj.rolling(window=window, min_periods=min_periods,
center=center)
return getattr(r, name)(**kwargs)

# check via the moments API
with catch_warnings(record=True):
return f(obj, window=window, min_periods=min_periods,
freq=freq, center=center, **kwargs)
center=center, **kwargs)

series_result = get_result(self.series, window=50)
frame_result = get_result(self.frame, window=50)
@@ -1479,17 +1447,17 @@ def get_result(obj, window, min_periods=None, freq=None, center=False):
if has_time_rule:
win = 25
minp = 10
series = self.series[::2].resample('B').mean()
frame = self.frame[::2].resample('B').mean()

if has_min_periods:
series_result = get_result(self.series[::2], window=win,
min_periods=minp, freq='B')
frame_result = get_result(self.frame[::2], window=win,
min_periods=minp, freq='B')
series_result = get_result(series, window=win,
min_periods=minp)
frame_result = get_result(frame, window=win,
min_periods=minp)
else:
series_result = get_result(self.series[::2], window=win,
freq='B')
frame_result = get_result(self.frame[::2], window=win,
freq='B')
series_result = get_result(series, window=win)
frame_result = get_result(frame, window=win)

last_date = series_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
@@ -2035,15 +2003,11 @@ class TestMomentsConsistency(Base):
(np.nanmax, 1, 'max'),
(np.nanmin, 1, 'min'),
(np.nansum, 1, 'sum'),
(np.nanmean, 1, 'mean'),
(lambda v: np.nanstd(v, ddof=1), 1, 'std'),
(lambda v: np.nanvar(v, ddof=1), 1, 'var'),
(np.nanmedian, 1, 'median'),
]
if np.__version__ >= LooseVersion('1.8.0'):
base_functions += [
(np.nanmean, 1, 'mean'),
(lambda v: np.nanstd(v, ddof=1), 1, 'std'),
(lambda v: np.nanvar(v, ddof=1), 1, 'var'),
]
if np.__version__ >= LooseVersion('1.9.0'):
base_functions += [(np.nanmedian, 1, 'median'), ]
no_nan_functions = [
(np.max, None, 'max'),
(np.min, None, 'min'),
@@ -2597,9 +2561,9 @@ def test_expanding_apply(self):
ser = Series([])
tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean()))

def expanding_mean(x, min_periods=1, freq=None):
def expanding_mean(x, min_periods=1):
return mom.expanding_apply(x, lambda x: x.mean(),
min_periods=min_periods, freq=freq)
min_periods=min_periods)

self._check_expanding(expanding_mean, np.mean)

@@ -3052,8 +3016,7 @@ def test_rolling_max_gh6297(self):

expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
with catch_warnings(record=True):
x = series.rolling(window=1, freq='D').max()
x = series.resample('D').max().rolling(window=1).max()
tm.assert_series_equal(expected, x)

def test_rolling_max_how_resample(self):
@@ -3071,24 +3034,21 @@ def test_rolling_max_how_resample(self):
# Default how should be max
expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
with catch_warnings(record=True):
x = series.rolling(window=1, freq='D').max()
x = series.resample('D').max().rolling(window=1).max()
tm.assert_series_equal(expected, x)

# Now specify median (10.0)
expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
with catch_warnings(record=True):
x = series.rolling(window=1, freq='D').max(how='median')
x = series.resample('D').median().rolling(window=1).max(how='median')
tm.assert_series_equal(expected, x)

# Now specify mean (4+10+20)/3
v = (4.0 + 10.0 + 20.0) / 3.0
expected = Series([0.0, 1.0, 2.0, 3.0, v],
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
with catch_warnings(record=True):
x = series.rolling(window=1, freq='D').max(how='mean')
tm.assert_series_equal(expected, x)
x = series.resample('D').mean().rolling(window=1).max(how='mean')
tm.assert_series_equal(expected, x)

def test_rolling_min_how_resample(self):

@@ -3105,9 +3065,8 @@ def test_rolling_min_how_resample(self):
# Default how should be min
expected = Series([0.0, 1.0, 2.0, 3.0, 4.0],
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
with catch_warnings(record=True):
r = series.rolling(window=1, freq='D')
tm.assert_series_equal(expected, r.min())
r = series.resample('D').min().rolling(window=1)
tm.assert_series_equal(expected, r.min())

def test_rolling_median_how_resample(self):

@@ -3124,9 +3083,8 @@ def test_rolling_median_how_resample(self):
# Default how should be median
expected = Series([0.0, 1.0, 2.0, 3.0, 10],
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
with catch_warnings(record=True):
x = series.rolling(window=1, freq='D').median()
tm.assert_series_equal(expected, x)
x = series.resample('D').median().rolling(window=1).median()
tm.assert_series_equal(expected, x)

def test_rolling_median_memory_error(self):
# GH11722