Combining PyCaret and TimeMachines for Time-Series Prediction

  1. Grab a live time series from microprediction
  2. Fit with pycaret
  3. Run some timemachines models
  4. Fit with pycaret again
!pip install pycaret[full]
!pip install --upgrade statsmodels
!pip install microprediction
!pip install timemachines
import microprediction
from datetime import datetime, timedelta
from microprediction import MicroReader
import random
import matplotlib.pyplot as plt
import pandas as pd
!pip install --upgrade pip

Retrieving live time-series data from microprediction.org

  • Time series are live, so each time you run this the data will be different
  • Time series are returned as lagged values, so you need to reverse them for chronological ordering
  • Time is measured in epoch seconds at microprediction
import microprediction
from datetime import datetime, timedelta
from microprediction import MicroReader
import random
import matplotlib.pyplot as plt
import pandas as pd
mr = MicroReader()
all_streams = mr.get_stream_names()
lagged_values = []
while len(lagged_values) < 900:
a_stream = random.choice(all_streams)
lagged_values, lagged_seconds = mr.get_lagged_values_and_times(a_stream)
values = list(reversed(lagged_values))
dt = [ datetime.fromtimestamp(s) for s in reversed(lagged_seconds)]
plt.plot(dt,values)
plt.title(a_stream)

Using PyCaret

df = pd.DataFrame(columns=['Date','y'])
df['date'] = dt
df['y']=values
df['dayofweek'] = df['date'].dt.dayofweek
df['hour'] = df['date'].dt.hour
num_lags = 10
lags = range(1,num_lags)
lag_names = [ 'y_'+str(lag) for lag in lags ]
for lag, lag_name in zip(lags,lag_names):
df[lag_name] = df['y'].shift(lag)
numerical_features = lag_names
categorical_features = ['dayofweek','hour']
s = setup(df, target = 'y', train_size = 0.95,
data_split_shuffle = False, fold_strategy = 'timeseries', fold = 3,
ignore_features = ['date'],
numeric_features = numerical_features,
categorical_features = categorical_features,
silent = True, verbose = False, session_id = 123)
top5 = compare_models(n_select = 5)

Using TimeMachines models as features

from timemachines.skaters.allskaters import EMA_SKATERS, DLM_SKATERS, THINKING_SKATERS, TSA_SKATERS, HYPOCRATIC_ENSEMBLE_SKATERS
from timemachines.skating import prior
skaters = EMA_SKATERS + DLM_SKATERS + TSA_SKATERS + HYPOCRATIC_ENSEMBLE_SKATERS
skater_names = [ f.__name__ for f in skaters ]
for f, skater_name in zip(skaters,skater_names):
print('Running '+skater_name)
y = df['y'].values
x,x_std = prior(f, y=y, k=1) # Runs a time-series model forward
df[skater_name] = x
s = setup(df, target = 'y', train_size = 0.95,
data_split_shuffle = False, fold_strategy = 'timeseries', fold = 3,
ignore_features = ['date'],
numeric_features = numerical_features + skater_names,
categorical_features = categorical_features,
silent = True, verbose = False, session_id = 123)
top5again = compare_models(n_select = 5)

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store