其他分享
首页 > 其他分享> > 初识Prophet模型(二)-- 应用篇

初识Prophet模型(二)-- 应用篇

作者:互联网

相关学习: 初识Prophet模型(一)–理论篇

7、Prophet 模型应用

7.0 背景描述

7.1 导入数据

import pandas as pd
import numpy as np
from fbprophet import Prophet
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('data.csv')
df.head()

df.dtypes #检查下df的数据类型
df['ds'] = df['ds'].apply(pd.to_datetime)# ds列必须是pandas的datetime数据类型,使用pandas自带的pd.to_datetime将日期转为datetime类型
plt.rcParams['figure.figsize']=(20,10)
plt.style.use('ggplot')
df.set_index('ds').y.plot()

7.2 拟合模型

model = Prophet(daily_seasonality=True)
model.fit(df)
<fbprophet.forecaster.Prophet at 0x10715a0b8>

7.3 预测(使用默认参数)

future = model.make_future_dataframe(periods=730)
future.tail()

'ds', 'trend', 'yhat_lower', 'yhat_upper', 'trend_lower', 'trend_upper',
       'additive_terms', 'additive_terms_lower', 'additive_terms_upper',
       'weekly', 'weekly_lower', 'weekly_upper', 'yearly', 'yearly_lower',
       'yearly_upper', 'multiplicative_terms', 'multiplicative_terms_lower',
       'multiplicative_terms_upper', 'yhat'
forecast=model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()

print(fig1)

成分分析**

趋势是由不同的成分组成,比如总趋势、年、季节、月、周等等,我们要将这些成分从趋势中抽取出来看看不同成分的趋势情况

'ds', 'trend', 'yhat_lower', 'yhat_upper', 'trend_lower', 'trend_upper',
       'additive_terms', 'additive_terms_lower', 'additive_terms_upper',
       'weekly', 'weekly_lower', 'weekly_upper', 'yearly', 'yearly_lower',
       'yearly_upper', 'multiplicative_terms', 'multiplicative_terms_lower',
       'multiplicative_terms_upper', 'yhat'

因此,在下面的拆解图中,weekly中的Monday为0.3的意思就是,在trend的基础上,加0.3;Saturday为-0.3的意思就是,在trend的基础上,减0.3。因此,这条线的高低也在一定程度上反应了“销量的趋势“。

fig2 = model.plot_components(forecast)
print(fig2)

forecast_df = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
df = pd.merge(df, forecast_df, on='ds', how='right')
df.set_index('ds').plot(figsize=(16,8), color=['royalblue', "green", "pink", "yellow"], grid=True);

x1 = forecast['ds']
y1 = forecast['yhat']
y2 = forecast['yhat_lower']
y3 = forecast['yhat_upper']
plt.plot(x1,y1)
plt.plot(x1,y2)
plt.plot(x1,y3)
plt.show()

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())

7.4 趋势突变点

自动检测变化点

fig = model.plot(forecast)
for cp in model.changepoints:
    plt.axvline(cp, c='pink', ls='--', lw=2)

deltas = model.params['delta'].mean(0)
fig = plt.figure(facecolor='w', figsize=(10, 6))
ax = fig.add_subplot(111)
ax.bar(range(len(deltas)), deltas, facecolor='#0072B2', edgecolor='#0072B2')
ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
ax.set_ylabel('Rate change')
ax.set_xlabel('Potential changepoint')
fig.tight_layout()

from fbprophet.plot import add_changepoints_to_plot
fig = model.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), model, forecast) #虚线处为给定时间序列中的变点

调整趋势灵活性

增大灵活性
m = Prophet(changepoint_prior_scale=0.5)
forecast = m.fit(df).predict(future)
fig = m.plot(forecast)

减少灵活性
forecast = m.fit(df).predict(future)
fig = m.plot(forecast)

指定变化点的位置

m = Prophet(changepoints=['2014-01-01'])
forecast = m.fit(df).predict(future)
fig = m.plot(forecast)

7.5季节性、假期效应和回归因子

假期和特殊事件建模

playoffs = pd.DataFrame({
  'holiday': 'playoff',
  'ds': pd.to_datetime(['2008-01-13', '2009-01-03', '2010-01-16',
                        '2010-01-24', '2010-02-07', '2011-01-08',
                        '2013-01-12', '2014-01-12', '2014-01-19',
                        '2014-02-02', '2015-01-11', '2016-01-17',
                        '2016-01-24', '2016-02-07']),
  'lower_window': 0,
  'upper_window': 1,
})
superbowls = pd.DataFrame({
  'holiday': 'superbowl',
  'ds': pd.to_datetime(['2010-02-07', '2014-02-02', '2016-02-07']),
  'lower_window': 0,
  'upper_window': 1,
})
holidays = pd.concat((playoffs, superbowls))

上面superbowl的日期也包含在playoff的日期中,也就是superbowl日期的影响会有个叠加效应

m = Prophet(holidays=holidays)
forecast = m.fit(df).predict(future)
forecast[(forecast['playoff'] + forecast['superbowl']).abs() > 0][['ds', 'playoff', 'superbowl']][-15:]

fig = m.plot_components(forecast)

from fbprophet.plot import plot_forecast_component
plot_forecast_component(m, forecast, 'superbowl')

内置假期

m = Prophet(holidays=holidays)
m.add_country_holidays(country_name='CN')
m.fit(df)

m.train_holiday_names
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

0                 playoff
1               superbowl
2          New Year's Day
3        Chinese New Year
4       Tomb-Sweeping Day
5               Labor Day
6    Dragon Boat Festival
7     Mid-Autumn Festival
8            National Day
dtype: object
m = Prophet(holidays=holidays)
m.add_country_holidays(country_name='US')
m.fit(df)

forecast = m.predict(future)
fig = m.plot_components(forecast)

季节性的傅里叶级数

from fbprophet.plot import plot_yearly
m = Prophet().fit(df)
a = plot_yearly(m)

from fbprophet.plot import plot_yearly
m = Prophet(yearly_seasonality=20).fit(df)
a = plot_yearly(m)

自定义季节性

m = Prophet(weekly_seasonality=False)
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
forecast = m.fit(df).predict(future)
fig = m.plot_components(forecast)

依赖于其他因素的季节性

这里先增加一列布尔类型的数据,来表示日期在淡季还是旺季:

def is_nfl_season(ds):
    date = pd.to_datetime(ds)
    return (date.month > 8 or date.month < 2)

df['on_season'] = df['ds'].apply(is_nfl_season)
df['off_season'] = ~df['ds'].apply(is_nfl_season)
m = Prophet(weekly_seasonality=False)
m.add_seasonality(name='weekly_on_season', period=7, fourier_order=3, condition_name='on_season')
m.add_seasonality(name='weekly_off_season', period=7, fourier_order=3, condition_name='off_season')

future['on_season'] = future['ds'].apply(is_nfl_season)
future['off_season'] = ~future['ds'].apply(is_nfl_season)
forecast = m.fit(df).predict(future)
fig = m.plot_components(forecast)

从图中可以看到,在旺季的时候每周末都会打球,周日和周一都有大幅度增长,但在淡季则完全没有。

假期和季节性的prior scale

m = Prophet(holidays=holidays, holidays_prior_scale=0.05).fit(df)
forecast = m.predict(future)
forecast[(forecast['playoff'] + forecast['superbowl']).abs() > 0][['ds', 'playoff', 'superbowl']][-10:]

可以看到,比起之前假期效应被减弱了,特别是在观看最少的superbowls上

可以用下面的方式设置每周季节性的prior_scale

额外的回归特征

下面,为NFL赛季的每周日添加这样一个回归特征,再画图看看这个特征的效果

def nfl_sunday(ds):
    date = pd.to_datetime(ds)
    if date.weekday() == 6 and (date.month > 8 or date.month < 2):
        return 1
    else:
        return 0
df['nfl_sunday'] = df['ds'].apply(nfl_sunday)

m = Prophet()
m.add_regressor('nfl_sunday')
m.fit(df)

future['nfl_sunday'] = future['ds'].apply(nfl_sunday)

forecast = m.predict(future)
fig = m.plot_components(forecast)

7.6 模型诊断(内置方法)

下面模型使用前五年的数据训练,预测后一年的数据

m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=366)

from fbprophet.diagnostics import cross_validation

df_cv = cross_validation(m, '365 days', initial='1825 days', period='365 days')
cutoff = df_cv['cutoff'].unique()[0]
df_cv = df_cv[df_cv['cutoff'].values == cutoff]

fig = plt.figure(facecolor='w', figsize=(10, 6))
ax = fig.add_subplot(111)
ax.plot(m.history['ds'].values, m.history['y'], 'k.')
ax.plot(df_cv['ds'].values, df_cv['yhat'], ls='-', c='#0072B2')
ax.fill_between(df_cv['ds'].values, df_cv['yhat_lower'],
                df_cv['yhat_upper'], color='#0072B2',
                alpha=0.2)
ax.axvline(x=pd.to_datetime(cutoff), c='gray', lw=4, alpha=0.5)
ax.set_ylabel('y')
ax.set_xlabel('ds')
ax.text(x=pd.to_datetime('2010-01-01'),y=12, s='Initial', color='black',
       fontsize=16, fontweight='bold', alpha=0.8)
ax.text(x=pd.to_datetime('2012-08-01'),y=12, s='Cutoff', color='black',
       fontsize=16, fontweight='bold', alpha=0.8)
ax.axvline(x=pd.to_datetime(cutoff) + pd.Timedelta('365 days'), c='gray', lw=4,
           alpha=0.5, ls='--')
ax.text(x=pd.to_datetime('2013-01-01'),y=6, s='Horizon', color='black',
       fontsize=16, fontweight='bold', alpha=0.8);

下面的交叉验证,horizon=365天,initial=730天,period=180天,在八年的时间序列中,等于有11((365*8-730-365)/180)个总的预测

from fbprophet.diagnostics import cross_validation
df_cv = cross_validation(m, initial='730 days', period='180 days', horizon = '365 days')
df_cv.head()

from fbprophet.diagnostics import performance_metrics
df_p = performance_metrics(df_cv)
df_p.head()

from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(df_cv, metric='mape')

7.7 模型评估

prediction_size = 365
train_df = df[:-prediction_size]
train_df.tail()

model2 = Prophet(daily_seasonality=True)
model2.fit(train_df)
future2 = model2.make_future_dataframe(periods=365)
forecast2 = model2.predict(future2)

model2 .plot(forecast2);

def make_comparison_dataframe(historical, forecast):
    return forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(historical.set_index('ds'))
cmp_df = make_comparison_dataframe(df, forecast2)
cmp_df.tail()

def calculate_forecast_errors(df, prediction_size):
    df = df.copy()
    df['e'] = df['y'] - df['yhat']
    df['p'] = 1* df['e'] / df['y']
    predicted_part = df[-prediction_size:]
    error_mean = lambda error_name: np.mean(np.abs(predicted_part[error_name]))
    return {'MAPE': error_mean('p'), 'MAE': error_mean('e')}
for err_name, err_value in calculate_forecast_errors(cmp_df, prediction_size).items():
    print(err_name, err_value)
MAPE 0.053184142465032766
MAE 0.4132303661978998

Box-Cox变换

def inverse_boxcox(y, lambda_):
    return np.exp(y) if lambda_ == 0 else np.exp(np.log(lambda_ * y + 1) / lambda_)
train_df2 = train_df.copy().set_index('ds')
from scipy import stats
import statsmodels.api as sm
train_df2['y'], lambda_prophet = stats.boxcox(train_df2['y'])
train_df2.reset_index(inplace=True)
model3  = Prophet(daily_seasonality=True)
model3 .fit(train_df2)
future3 = model3.make_future_dataframe(periods=prediction_size)
forecast3 = model3.predict(future3)
for column in ['yhat']:
    forecast3[column] = inverse_boxcox(forecast3[column],lambda_prophet)
cmp_df2 = make_comparison_dataframe(df, forecast3)
for err_name, err_value in calculate_forecast_errors(cmp_df2, prediction_size).items():
     print(err_name, err_value)
MAPE 0.04373071028220759
MAE 0.34260353853143777

对最后一年的真实值与预测值进行可视化对比

test_df = df[-prediction_size:]
test_df = test_df.set_index('ds')
forecast2 = forecast2[['ds','yhat']].set_index('ds')
df_all = forecast2.join(test_df).dropna()
df_all.head()

df_all.plot()
plt.rcParams['figure.figsize']=(30,20)
plt.style.use('ggplot')
plt.legend(['true', 'yhat'])
plt.show()

标签:plot,upper,--,Prophet,forecast,df,初识,yhat,ds
来源: https://blog.csdn.net/JanetHULAHA/article/details/112062700