Skip to content Skip to sidebar Skip to footer

Calculate Base Effect Based On Month To Month Changes In Pandas

Let's say we have a sample dataframe df which is CPI data having date and MoM columns, I need to calculate base_effect and new_price_effect based on them: date MoM base_

Solution 1:

Try:

from dateutil.relativedelta import relativedelta
import pandas as pd
from numpy import nan
from datetime import datetime

data = [{'date': '2019-01', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-02', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-03', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-04', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-05', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-06', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-07', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-08', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-09', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-10', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-11', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2019-12', 'MoM': 1.01, 'base_effect': nan, 'new_price_effect': nan},
        {'date': '2020-01',
         'MoM': 1.015,
         'base_effect': 1.115668347,
         'new_price_effect': 1.015},
        {'date': '2020-02',
         'MoM': 1.015,
         'base_effect': 1.104622125,
         'new_price_effect': 1.030225},
        {'date': '2020-03',
         'MoM': 1.015,
         'base_effect': 1.093685273,
         'new_price_effect': 1.045678375},
        {'date': '2020-04',
         'MoM': 1.015,
         'base_effect': 1.082856706,
         'new_price_effect': 1.061363551},
        {'date': '2020-05',
         'MoM': 1.015,
         'base_effect': 1.072135352,
         'new_price_effect': 1.077284004},
        {'date': '2020-06',
         'MoM': 1.015,
         'base_effect': 1.061520151,
         'new_price_effect': 1.093443264},
        {'date': '2020-07',
         'MoM': 1.015,
         'base_effect': 1.05101005,
         'new_price_effect': 1.109844913},
        {'date': '2020-08',
         'MoM': 1.015,
         'base_effect': 1.04060401,
         'new_price_effect': 1.126492587},
        {'date': '2020-09',
         'MoM': 1.015,
         'base_effect': 1.030301,
         'new_price_effect': 1.143389975},
        {'date': '2020-10',
         'MoM': 1.015,
         'base_effect': 1.0201,
         'new_price_effect': 1.160540825},
        {'date': '2020-11',
         'MoM': 1.015,
         'base_effect': 1.01,
         'new_price_effect': 1.177948937},
        {'date': '2020-12',
         'MoM': 1.015,
         'base_effect': 1.0,
         'new_price_effect': 1.195618171}]

df = pd.DataFrame.from_dict(data)
df['date'] = pd.to_datetime(df['date'])
print(df)


defbase_effect(x):
    required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)


defnew_price_effect(x):
    required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)

df['base_effect'] = df.apply(base_effect, axis=1)
df['new_price_effect'] = df.apply(new_price_effect, axis=1)
print(df)

If you want to use it with other columns try:

def base_effect(x,col_name):
    required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub[col_name].product(min_count=1)


def new_price_effect(x, col_name):
    required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub[col_name].product(min_count=1)

df['base_effect'] = df.apply(base_effect, col_name='MoM', axis=1)
df['new_price_effect'] = df.apply(new_price_effect, col_name='MoM', axis=1)
print(df)

As the updated data have end dates for months, isin wont work with MS frequency. You can update your functions as follows:

def base_effect(x,col_name):
    required_interval = list(pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month))
    if required_interval:
        df_sub = df[(df.date>=required_interval[0]) & (df.date<=required_interval[-1])]

        return df_sub[col_name].product(min_count=1)
    else:
        return np.nan

Solution 2:

For the updated data, I need to floor a date to the first date of that month:

from pandas.tseries.offsets import MonthBegin
from dateutil.relativedelta import relativedelta
import pandas as pd
from numpy import nan
from datetime import datetime

data = [{'date': '2020-1-31', 'MoM': 1.014},
 {'date': '2020-2-29', 'MoM': 1.008},
 {'date': '2020-3-31', 'MoM': 0.988},
 {'date': '2020-4-30', 'MoM': 0.991},
 {'date': '2020-5-31', 'MoM': 0.992},
 {'date': '2020-6-30', 'MoM': 0.999339},
 {'date': '2020-7-31', 'MoM': 1.006159},
 {'date': '2020-8-31', 'MoM': 1.00401},
 {'date': '2020-9-30', 'MoM': 1.002325},
 {'date': '2020-10-31', 'MoM': 0.997},
 {'date': '2020-11-30', 'MoM': 0.9940000000000001},
 {'date': '2020-12-31', 'MoM': 1.0070000000000001},
 {'date': '2021-1-31', 'MoM': 1.01},
 {'date': '2021-2-28', 'MoM': 1.006},
 {'date': '2021-3-31', 'MoM': 0.995},
 {'date': '2021-4-30', 'MoM': 0.997},
 {'date': '2021-5-31', 'MoM': 0.998},
 {'date': '2021-6-30', 'MoM': 0.996},
 {'date': '2021-7-31', 'MoM': 1.003},
 {'date': '2021-8-31', 'MoM': 1.001}]

df = pd.DataFrame.from_dict(data)
df['date'] = pd.to_datetime(df['date']) - MonthBegin(1)


defbase_effect(x):
    required_interval = pd.date_range(x['date'] - relativedelta(months=11), freq='MS',
                                      periods=12 - x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)


defnew_price_effect(x):
    required_interval = pd.date_range(datetime(x['date'].year, 1, 1), freq='MS', periods=x['date'].month)
    df_sub = df[df.date.isin(required_interval)]
    return df_sub['MoM'].product(min_count=1)

df['base_effect'] = df.apply(base_effect, axis=1)
df['new_price_effect'] = df.apply(new_price_effect, axis=1)
print(df)

Out:

dateMoMbase_effectnew_price_effect02020-01-01  1.014000NaN1.01400012020-02-01  1.008000NaN1.02211222020-03-01  0.988000NaN1.00984732020-04-01  0.991000NaN1.00075842020-05-01  0.992000NaN0.99275252020-06-01  0.999339NaN0.99209662020-07-01  1.006159NaN0.99820672020-08-01  1.004010NaN1.00220982020-09-01  1.002325NaN1.00453992020-10-01  0.997000NaN1.001525102020-11-01  0.994000NaN0.995516112020-12-01  1.007000NaN1.002485122021-01-01  1.0100000.9886441.010000132021-02-01  1.0060000.9807971.016060142021-03-01  0.9950000.9927101.010980152021-04-01  0.9970001.0017261.007947162021-05-01  0.9980001.0098041.005931172021-06-01  0.9960001.0104721.001907182021-07-01  1.0030001.0042861.004913192021-08-01  1.0010001.0002751.005918

Post a Comment for "Calculate Base Effect Based On Month To Month Changes In Pandas"