Price Forecasting using Current Data with a Pre-Trained Mode
Using the model for price prediction. Armed with a well trained model optimized for trading and the latest data from Binance or Yahoo Finance at our hands, gathered as described in this post, we are finally ready to reap the reward for our hard work: use the model for a price prediction of this asset for the coming hours or days. Compared to the original optimizing and training of the model, we can now do forecasting with a limited dataset very quickly, a matter of a few minutes instead of hours!
Scientific research revealed that the ARIMA model has a strong potential for short-term prediction. It can compete favorably with other techniques for stock price prediction.
Get Data, Load the Model and Go!
LSTM model usage for price prediction is the purpose of the UseARIMA class. This Python class loads pre-saved ARIMA and SARIMAX models from Disk when present, then loads the current dataset, checks for stationarity of the time series data and the need for differencing. After this we fit the models on the current data to finally make a price prediction for the next 12 hours. We plot both the ARIAMA and SARIMAX predictions together with the projected forecast.
Following the recent refactoring of the training ARIMA and SARIMAX models script, we also gave this script an upgrade.
Preliminaries
Things start with loading the necessary libraries such as pandas, numpy and the statistical models from statsmodels. We also load plotly for plotting and several other utilities. Additionally stuff needed for multthreading and communicating with the Main Window are loaded. We present the class here for use with the PyQt6 GUI Framework. You can use it just as well in a CL terminal application or a Jupyther Notebook.
# Copyright (c) 2024, 2025 Hans De Weme
# Licensed under the MIT License (https://opensource.org/licenses/M
# Class: TrainLSTM
# Purpose: using pre-saved ARIMA and/or SARIMAX model with a current dataset
# plotting the predictions and a forecast
"""
Imports necessary libraries such as NumPy, Pandas, statsmodels SARIMAX
Loads preprocessed data set
Load prefitted models
Divides the data into training and testing sets
Uses the trained ARIMA and SARAMI model to make future price predictions.
Plots the predicted prices on a graph.
Saves the predicted prices to a CSV file.
"""
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
import joblib
import os
import fnmatch
from pathlib import Path
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.api as sm
from datetime import datetime
from get_current_data import getCurrent
from PyQt6.QtCore import QThread, pyqtSignal, QEventLoop, pyqtSlot
from make_features import MakeFeatures
from get_sentiment_data import getSentiment
import warnings
warnings.filterwarnings("ignore")
PythonInitialization
When initializing the class connects to the calling parent and loads the general settings. The Main Window then starts this worker thread by calling the run() method. This method orchestrates the processing: loading the models, getting the dataset, using the models on the dataset. We use several other methods to get user input, manipulate data frames, perform (reverse) differencing.
class UseArima(QThread):
progress_signal = pyqtSignal(str) # Signal to communicate progress (string message) back to the main thread
failure_signal = pyqtSignal(str) # Signal to request saving the model
file_requested = pyqtSignal(str)
file_selected = pyqtSignal(str)
request_input_signal = pyqtSignal()
received_input_signal = pyqtSignal(float)
def __init__(self, asset, settings, kind, parent=None):
super().__init__() # necessary for QObject, needed for pyqtSignal
self.user_input = None
self.received_input_signal.connect(self.on_input_received)
self.parent = parent
self.selected_file = None
self.file_selected.connect(self.on_file_selected) # Connect to update internal state
self.progress_signal.connect(parent.set_status_message)
self.settings = settings
self.asset = asset.upper()
self.FREQ = '1h'
self.kind = kind
self.EXO = False
if self.kind == 'S':
self.MARKT = self.asset
else: # 'C' = cryptocurrency, 'F' = cryptocurrency plus features engineering
self.MARKT = self.asset+'USDT'
if self.kind == 'F':
self.EXO = True
self.arima = False
self.sarimax = False
self.progress_signal.emit("Libraries loaded!")
self.N_INPUT = 12 # number of new datapoints to predict
self.TRAIN_SPLIT = 0.2 # size of test data set apart from train data
def run(self):
self.loop = QEventLoop() # Create an event loop to block execution until signals are received
if self.get_models() == False:
self.failure_signal.emit("No Valid Model(s) Found!")
return
else:
if self.get_data() == False:
self.failure_signal.emit("No Data Loaded!")
return
self.init_data()
self.do_ADF()
self.do_predict()
self.do_forecast()
# display a message in the GUI and print it on the terminal
def do_message(self, the_message):
self.progress_signal.emit(the_message)
print(the_message)
#create timestamp as string
def time_stamp(self):
now = datetime.now()
d = now.strftime("%d")
m = now.strftime("%m")
j = now.strftime("%Y")
h = now.strftime("%H")
n = now.strftime("%M")
nu = j+m+d+h+n
return nu
@pyqtSlot(str) # Slot to update selected file path
def on_file_selected(self, file_path):
self.selected_file = file_path
self.loop.quit() # Unblock the execution of the thread
def request_input_from_main_thread(self): # This method is used as a callback for get_current_data module
self.request_input_signal.emit()
while self.user_input is None: # Wait until the input is received
self.msleep(100)
return self.user_input
@pyqtSlot(float)
def on_input_received(self, input_value):
self.user_input = input_value
# sync time series dataframes on the index earliest shared datetime point
def sync_dataframes_by_start_date(self, df1, df2):
latest_start_date = max(df1.index.min(), df2.index.min()) # Find the latest start date between the two dataframes
df1_filtered = df1[df1.index >= latest_start_date] # Filter both DataFrames based on the latest start date
df2_filtered = df2[df2.index >= latest_start_date]
return df1_filtered, df2_filtered
# Function to test stationarity
# A stationary time series has a constant mean, variance, and autocovariance over time
# Augmented Dickey-Fuller (ADF) test, tests if a time series has a unit root, if so: it is not stationary (fifferencing necessary before ARIMA!)
# if p-value is low (typically less than 0.05), the time series is stationary
def test_stationarity(self, timeseries):
dftest = adfuller(timeseries, autolag='AIC')
return dftest[1] # p-value
def difference(self, dataset, interval=1): # 1 for first-order differencing
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i - interval]
diff.append(value)
return diff
# Utility function for reverse differencing
def reverse_differencing(self, forecast, base_value, target_index):
reconstructed = []
current_value = base_value
for yhat in forecast:
current_value += yhat
reconstructed.append(current_value)
if len(reconstructed) > len(target_index):
reconstructed = reconstructed[:len(target_index)]
elif len(reconstructed) < len(target_index):
target_index = target_index[:len(reconstructed)]
return pd.Series(reconstructed, index=target_index)
PythonLoading Paramaters, Model and Dataset
First the class looks for ARIAMA and SARIMAX models in the ‘ saved_models’ folder, as defined in the settings. Other than LSTM models, that use separate saved hyperparameters, you save (S)ARIMA(X) models including the hyperparameters. If several models are available the user may pick the ones to use. Below we shown 2 models trained with XRP’s complete history and exogenous (sentiment) data.

We load 1 model, then we invoke the Main Window to collect the most recent data to use with the model.
Loading Models
def get_models(self): # Load ARIMA and SARIMAX models from Disk when present
self.suc = False
substring = self.MARKT+'_ARIMA_'
pad = self.settings['models']
pad = Path(pad)
dir = pad.resolve()
self.do_message('looking for saved ARIMA model in :'+str(dir))
matching_files = [f for f in os.listdir(dir) if fnmatch.fnmatch(f, f"*{substring}*")]
matching_files = [os.path.join(dir, f) for f in matching_files]
if len(matching_files) == 0:
print(f"No saved Model-files containing '{substring}' found.")
elif len(matching_files) > 1:
print(f"Multiple saved Model-files containing '{substring}' found:")
for file in matching_files:
print(f"- {file}")
self.file_requested.emit('ARIMA') # Open a file dialog and get the selected file path
self.loop.exec() # Block until file is selected
while self.selected_file is None: # Wait until the file is selected
self.msleep(100) # Sleep briefly to allow main thread to update
self.arima_naam = self.selected_file
if self.arima_naam == "No Directory selected":
self.arima = False
else:
self.arima = True
self.suc = True
else:
print(f"Found exactly one saved Model-file: {matching_files[0]}")
self.suc = True
self.arima = True
self.arima_naam = matching_files[0]
substring = self.MARKT+'_SARIMAX_'
print('looking for saved SARIMAX model in :'+str(dir))
matching_files = [f for f in os.listdir(dir) if fnmatch.fnmatch(f, f"*{substring}*")]
matching_files = [os.path.join(dir, f) for f in matching_files]
if len(matching_files) == 0:
print(f"No saved Model-files containing '{substring}' found.")
elif len(matching_files) > 1:
print(f"Multiple saved Model-files containing '{substring}' found:")
for directory in matching_files:
print(f"- {directory}")
self.file_requested.emit('SARIMAX') # Open a file dialog and get the selected file path in the Main Window
self.loop.exec() # Block until file is selected
while self.selected_file is None: # Wait until the file is selected
self.msleep(100) # Sleep briefly to allow main thread to update
self.sarimax_naam = self.selected_file
if self.sarimax_naam == "No Directory selected":
self.sarimax = False
else:
self.sarimax = True
self.suc = True
else:
print(f"Found exactly one directory: {matching_files[0]}")
self.suc = True
self.sarimax = True
self.sarimax_naam = matching_files[0]
if self.arima and self.sarimax:
print('Both Models were succesfully found')
self.progress_signal.emit('Both Models were succesfully found!')
#load models first
if self.arima:
self.arima_model = joblib.load(self.arima_naam)
print('* * * ARIMA model geladen: '+str(self.arima_naam))
self.progress_signal.emit("Model loaded!"+str(self.arima_naam))
if self.sarimax:
self.sarimax_model = joblib.load(self.sarimax_naam)
print('* * * SARIMAX model geladen: '+str(self.sarimax_naam))
self.progress_signal.emit("Model loaded!"+str(self.sarimax_naam))
return self.suc
PythonGet the Data
Next to the current XRP data we also collect 3 sentiment indices.
def get_data(self):
trim = True # trim dataset down to close price only
if self.kind == 'F': # except when features engineering is needed
trim = False
self.data = getCurrent(self.kind, self.asset, trim = trim, input_callback=self.request_input_from_main_thread)
if self.data.get_data(self.kind) == True:
self.df = pd.DataFrame(self.data.df)
else:
return False
if self.df.empty or self.df is None:
return False
return True
def init_data(self):
# load extra sentiment data to use as exogenous feutures
if self.EXO == True:
self.do_message('Getting Fear and Greed Sentiment Data')
self.getdata= getSentiment()
self.fag = True
fg = self.getdata.get_fagi_data()
if fg is None or fg.empty:
self.do_message('Failed to get Fear and Greed Sentiment Data')
self.fag = False
else:
self.df, fg = self.sync_dataframes_by_start_date(self.df, fg)
fg = fg.reindex(self.df.index, method='ffill') # add fear and greed to features
fg.rename(columns={"value": "fag"}, inplace=True)
self.df['fag'] = fg['fag']
self.df['fag'] = self.df['fag'].ffill()
self.do_message('Getting Stock Index Sentiment Data')
index = '^GSPC' # '^GSPC' S&P 500 index
colnm = 'gspc'
self.gspc = True
sp = self.getdata.get_indices_data(index, colnm) # add S&P500 index to features
if sp is None or sp.empty:
self.do_message('Failed to get S&P500 Sentiment Data')
self.gspc = False
else:
self.df, sp = self.sync_dataframes_by_start_date(self.df, sp)
sp = sp.reindex(self.df.index, method='ffill')
self.df['gspc'] = sp['gspc']
self.df['gspc'] = self.df['gspc'].ffill()
index = '^VIX' # '^VIX' CBOE Volatility Index
colnm = 'vix'
self.vix = True
sp = self.getdata.get_indices_data(index, colnm) # add volatility index to features
if sp is None or sp.empty:
self.do_message('Failed to get CBOE Volatility Index Sentiment Data')
self.vix = False
else:
self.df, sp = self.sync_dataframes_by_start_date(self.df, sp)
sp = sp.reindex(self.df.index, method='ffill') # add VIX to features
self.df['vix'] = sp['vix']
self.df['vix'] = self.df['vix'].ffill()
if self.fag == False and self.gspc == False and self.vix == False:
self.EXO = False
self.do_message('No Additional Features Available for Exogenous Factor with SARIMAX')
else:
print(self.df)
self.do_message('Creating Features for Exogenous Factor with SARIMAX')
mf = MakeFeatures(self.df)
dfplus = mf.do_make_features()
corr_matrix = dfplus.corr()['close'].drop('close')
fig = px.bar(x=corr_matrix.index, y=corr_matrix.values, labels={'x': 'Features', 'y': 'Correlations'}, title='corr', color=corr_matrix.values, color_continuous_scale='Viridis')
pio.show(fig)
min_length = min(len(self.df), len(dfplus)) # Ensure both arrays have the same length
self.df = self.df[:min_length]
dfplus = dfplus[:min_length]
self.df = dfplus[['close']]
self.exo = dfplus
print('Raw Testet with Features:')
print(self.exo.tail())
self.train_size = int(len(self.df) * (1-self.TRAIN_SPLIT)) # Split data into train and test
self.train, self.test = self.df[0:self.train_size], self.df[self.train_size:len(self.df)]
if self.EXO == True:
self.xtrain_size = int(len(self.exo) * (1-self.TRAIN_SPLIT))
self.xtrain, self.xtest = self.exo[0:self.train_size], self.exo[self.train_size:len(self.df)]
self.endp = self.test.tail(1).index.item() # Gets the last index value of the test set
self.startp = self.test.head(1).index.item() # Gets the first index value of the test set
print('* * * enddt testset: '+str(self.endp))
print('* * * startdt testset: '+str(self.startp))
print('* * * length testset: '+str(len(self.test)))
print('* * * length trainset: '+str(len(self.train)))
print('last 5 rows raw train data:')
print(self.train.tail())
print('last 5 rows raw testet:')
print(self.test.tail())
# save raw data
self.train_r = self.train
self.test_r = self.test
PythonFeatures Engineering and Correlation Matrix
With the data gathered we compute a correlation matrix showing the strength of the relation between predictors and target.

Using the Model
Finally we load the models and prepare the dataset. After differencing the data it is split into training and testing data. For both the ARIMA and the SARIMAX we then fit the model on the training data and evaluate it against the test data, plotting prediction and actual data. At last a forecast for the next 12 hours beyond the dataset is made and plotted as well.
Autocorrelation and Stationarity testing
def do_ADF(self):
self.do_message("* * * Running ADF test and auto-suggesting ARIMA parameters * * *")
self.DIFF = False
self.d = 0
# ACF & PACF test & plots before differencing
lag_acf = sm.tsa.acf(self.df['close'], nlags=20)
lag_pacf = sm.tsa.pacf(self.df['close'], nlags=20)
fig = make_subplots(rows=1, cols=2, subplot_titles=('Autocorrelation Function', 'Partial Autocorrelation Function'))
fig.add_trace(go.Bar(x=np.arange(len(lag_acf)), y=lag_acf), row=1, col=1)
fig.add_trace(go.Bar(x=np.arange(len(lag_pacf)), y=lag_pacf), row=1, col=2)
fig.update_layout(height=600, width=1200, title_text="ACF and PACF Plots (Before Differencing)")
fig.update_xaxes(title_text="Lag", row=1, col=1)
fig.update_xaxes(title_text="Lag", row=1, col=2)
fig.update_yaxes(title_text="Autocorrelation", row=1, col=1)
fig.update_yaxes(title_text="Partial Autocorrelation", row=1, col=2)
pio.show(fig)
# ADF stationarity test
p_value = self.test_stationarity(self.df['close']) # test stationarity with ADF test on full dataset
print(f"* * * Augmented Dickey-Fuller (ADF) p-value: {p_value:.4f}")
if p_value < 0.05:
print("* * * Series is stationary → no differencing needed")
self.DIFF = False
self.d = 0
else:
print("* * * Series is non-stationary → differencing applied (d=1)")
self.do_message("Differencing not stationary Dataset, ADF value: "+str(p_value))
self.d = 1
self.DIFF = True
self.df['close'] = self.df['close'].diff().fillna(0)
if self.EXO:
print("Testset last rows before and after differencing:")
print(self.exo.tail())
if self.gspc: self.exo['gspc'] = self.exo['gspc'].diff().fillna(0)
if self.fag: self.exo['fag'] = self.exo['fag'].diff().fillna(0)
if self.vix: self.exo['vix'] = self.exo['vix'].diff().fillna(0)
print(self.exo.tail())
# Positioning and index logic
self.train_size = int(len(self.df) * (1 - self.TRAIN_SPLIT))
self.train, self.test = self.df[0:self.train_size], self.df[self.train_size:]
if self.EXO:
self.xtrain, self.xtest = self.exo[0:self.train_size], self.exo[self.train_size:]
self.xtrain_size = len(self.xtrain)
self.endp = self.test.tail(1).index.item()
self.startp = self.test.head(1).index.item()
print(f"* * * Start index: {self.startp}, End index: {self.endp}")
print(f"Type of index: {type(self.df.index)}")
# Needed for differencing offset in prediction
if self.DIFF:
start_pos = max(0, self.train_size - 1)
end_pos = len(self.df) - 1
self.startp = start_pos
self.endp = end_pos
print(f"Updated for differencing: start = {self.startp}, end = {self.endp}")
# Now suggest p and q based on differenced (stationary) series
stationary_series = self.df['close']
pacf_vals = pacf(stationary_series, nlags=20)
acf_vals = acf(stationary_series, nlags=20)
self.p_range = list(range(0, np.where(pacf_vals[1:] < 0.2)[0][0] + 2)) if any(pacf_vals[1:] < 0.2) else [0, 1]
self.q_range = list(range(0, np.where(acf_vals[1:] < 0.2)[0][0] + 2)) if any(acf_vals[1:] < 0.2) else [0, 1]
# Check for seasonality
self.s = None
for s_candidate in [4, 6, 12, 24]:
if s_candidate < len(acf_vals) and abs(acf_vals[s_candidate]) > 0.2:
self.s = s_candidate
print(f"📈 Detected seasonal lag ≈ {self.s} (ACF = {acf_vals[s_candidate]:.2f})")
break
print(f"Suggested p range: {self.p_range}")
print(f"Suggested q range: {self.q_range}")
print(f"Using d = {self.d}")
if self.s:
print(f"Using seasonal period s = {self.s}")
else:
print("No strong seasonality detected.")
Python
Predictions
With the ARIMA and SARIMAX models predictions are made and compared with the actual test data.
def do_predict(self):
if self.EXO:
# Define which exo features are active
self.exo_cols = [
'BB_mid',
'kc_middle',
'fag' if self.fag else None,
'gspc' if self.gspc else None,
'vix' if self.vix else None
]
self.exo_cols = [col for col in self.exo_cols if col is not None]
# Align close prices with EXOG features on time index
self.aligned_data = self.df.join(self.exo[self.exo_cols], how='inner')
# Sanity info
print("EXOG alignment:")
print("df index range:", self.df.index.min(), "to", self.df.index.max())
print("exo index range:", self.exo.index.min(), "to", self.exo.index.max())
print("aligned index range:", self.aligned_data.index.min(), "to", self.aligned_data.index.max())
print("Lengths — df:", len(self.df), "exo:", len(self.exo), "aligned:", len(self.aligned_data))
# Needed for forecasting later
self.startp = self.xtrain_size - 1
self.endp = len(self.aligned_data) - 1
if self.arima == True:
order = self.arima_model.specification.order
if self.EXO:
model = SARIMAX(self.aligned_data['close'], exog=self.aligned_data[self.exo_cols], order=order, enforce_stationarity=False, enforce_invertibility=False)
else:
model = SARIMAX(self.df['close'], order=order, enforce_stationarity=False, enforce_invertibility=False)
fitted_model = model.fit(disp=False)
predictions = [] # predictions start as a Series!
if self.EXO:
predictions = fitted_model.predict(start=self.startp, end=self.endp, exog=self.xtest[self.exo_cols])
else:
predictions = fitted_model.predict(start=self.startp, end=self.endp)
self.best_model = fitted_model # needed for forecast!
# Inverse differencing
if self.DIFF:
self.do_message("Reversing differencing for forecast...")
last_value = self.train_r['close'].iloc[-1]
predictions = self.reverse_differencing(forecast=predictions, base_value=last_value, target_index=self.test_r.index)
actual_series = self.test_r['close']
else:
predictions = pd.Series(predictions, index=self.test_r.index)
actual_series = self.test_r['close']
# Create predictions dataframe
predictions = pd.DataFrame({'dt': self.test_r.index, 'actual': self.test['close'].values, 'predicted': predictions.values})
self.predictions_arima = predictions
# Compute and print metrics
mae = mean_absolute_error(predictions['actual'], predictions['predicted'])
rmse = np.sqrt(mean_squared_error(predictions['actual'], predictions['predicted']))
r2 = r2_score(predictions['actual'], predictions['predicted'])
print(f"{'ARIMA'} Prediction Metrics:")
print(f"MAE = {mae:.4f}")
print(f"RMSE = {rmse:.4f}")
print(f"R2 = {r2:.4f}")
# Plotly visualization of train + test + prediction
trains = self.train_r.tail(len(self.test)) # shortening for clearer plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=trains.index, y=trains['close'], mode='lines', name='Train', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=actual_series.index, y=actual_series, name="Actual", line=dict(color='green')))
fig.add_trace(go.Scatter(x=predictions['dt'], y=predictions['predicted'], name="ARIMA Prediction", line=dict(color='red', dash='dash')))
fig.update_layout(title=f"{'ARIMA'} Prediction vs Actual", xaxis_title="Date", yaxis_title="Price")
pio.show(fig)
if self.sarimax == True:
seasonal_order = self.sarimax_model.specification.seasonal_order
if self.EXO:
model = SARIMAX(self.aligned_data['close'], exog=self.aligned_data[self.exo_cols], order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
else:
model = SARIMAX(self.df['close'], order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
fitted_model = model.fit(disp=False)
predictions = [] # predictions start as a Series!
if self.EXO:
predictions = fitted_model.predict(start=self.startp, end=self.endp, exog=self.xtest[self.exo_cols])
else:
predictions = fitted_model.predict(start=self.startp, end=self.endp)
self.best_model = fitted_model # needed for forecast!
# Inverse differencing
if self.DIFF:
self.do_message("Reversing differencing for forecast...")
last_value = self.train_r['close'].iloc[-1]
predictions = self.reverse_differencing(forecast=predictions, base_value=last_value, target_index=self.test_r.index)
actual_series = self.test_r['close']
else:
predictions = pd.Series(predictions, index=self.test_r.index)
actual_series = self.test_r['close']
# Create predictions dataframe
predictions = pd.DataFrame({'dt': self.test_r.index, 'actual': self.test['close'].values, 'predicted': predictions.values})
self.predictions_sarimax = predictions
# Compute and print metrics
mae = mean_absolute_error(predictions['actual'], predictions['predicted'])
rmse = np.sqrt(mean_squared_error(predictions['actual'], predictions['predicted']))
r2 = r2_score(predictions['actual'], predictions['predicted'])
print(f"{'ARIMA'} Prediction Metrics:")
print(f"MAE = {mae:.4f}")
print(f"RMSE = {rmse:.4f}")
print(f"R2 = {r2:.4f}")
# Plotly visualization of train + test + prediction
trains = self.train_r.tail(len(self.test)) # shortening for clearer plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=trains.index, y=trains['close'], mode='lines', name='Train', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=actual_series.index, y=actual_series, name="Actual", line=dict(color='green')))
if self.arima == True:
fig.add_trace(go.Scatter(x=self.test.index, y=self.predictions_arima['predicted'], mode='lines', name='ARIMA Predicted Test'))
fig.add_trace(go.Scatter(x=predictions['dt'], y=predictions['predicted'], name="SARIMAX Prediction", line=dict(color='red', dash='dash')))
fig.update_layout(title=f"{'SARIMAX'} Prediction vs Actual", xaxis_title="Date", yaxis_title="Price")
pio.show(fig)
PythonHere is a plot of the combined predictions and the actual data.

Forecast for the next 12 Hours
def do_forecast(self):
from sklearn.linear_model import LinearRegression
forecast = []
if self.EXO == True:
# linearly extrapolate the exogenous variable for the future period
future_exog = pd.DataFrame(columns=self.exo_cols)
for col in self.exo_cols:
y = self.exo[col].tail(6).values
X = np.arange(len(y)).reshape(-1, 1)
model = LinearRegression().fit(X, y)
X_future = np.arange(len(y), len(y) + self.N_INPUT).reshape(-1, 1)
y_future = model.predict(X_future)
future_exog[col] = y_future
forecast_obj = self.best_model.get_forecast(steps=self.N_INPUT, exog=future_exog)
else:
forecast_obj = self.best_model.get_forecast(steps=self.N_INPUT)
forecast = forecast_obj.predicted_mean
ci = forecast_obj.conf_int() # confidence interval for forecasts
last_known_timestamp = pd.to_datetime(self.test_r.index[-1]) # Ensure the last known timestamp is in the correct datetime format
future_timestamps = [last_known_timestamp + DateOffset(hours=x) for x in range(1, self.N_INPUT)] # Generate future timestamps based on the last known timestamp and hourly intervals
if len(future_timestamps) != len(forecast):
print("Mismatch between future timestamps and forecast length.") # Check if the lengths of forecast and future_timestamps match
if self.DIFF:
print('* * * Reverse differencing SARIMAX forecasts to original scale')
sarimax_forecasts = []
last_value = self.test_r['close'].iloc[-1] # Reverse the first forecast value using the last actual value
sarimax_forecasts = self.reverse_differencing(forecast, base_value=last_value, target_index=future_timestamps) # Update SARIMAX predictions with original scale values
ci_lower = self.reverse_differencing(ci.iloc[:, 0], base_value=last_value, target_index=future_timestamps)
ci_upper = self.reverse_differencing(ci.iloc[:, 1], base_value=last_value, target_index=future_timestamps)
min_length = min(len(future_timestamps), len(sarimax_forecasts)) # Ensure both arrays have the same length
future_timestamps = future_timestamps[:min_length]
sarimax_forecasts = sarimax_forecasts[:min_length]
forecast_df = pd.DataFrame({'dt': future_timestamps, 'Forecast': sarimax_forecasts}) # Use SARIMAX forecasts
else:
forecast_df = pd.DataFrame({'dt': future_timestamps, 'Forecast': forecast.values}) # Create a DataFrame for the forecasted values
forecast_df = forecast_df.set_index('dt') # Set 'dt' as the index
print("Structure of forecast_df:") # Print the structure of forecast_df for verification
print(forecast_df.head()) # Display the first few rows
print(forecast_df.columns) # Display the column names
print("Final forecast DataFrame:") # Print the final forecast DataFrame to verify
print(forecast_df)
filename = f'forecast_{self.MARKT}_next_{self.N_INPUT}_hours_{self.time_stamp()}.csv'
forecast_df.to_csv(filename)
print(f"✅ Forecast saved to {filename}") # Save the forecast to a CSV file
df48 = self.test_r.tail(48) # Plot the forecast along with the last 48 hours of historical data
fig = go.Figure()
fig.add_trace(go.Scatter(x=df48.index, y=df48['close'], mode='lines', name='Historical Data')) # Plot historical data from df48
fig.add_trace(go.Scatter(x=forecast_df.index, y=forecast_df['Forecast'], mode='lines', name='Forecast Next 12 Hours')) # Plot forecast data from forecast_df
fig.add_trace(go.Scatter(x=ci_lower.index, y=ci_lower, name='Lower Bound', line=dict(dash='dot', color='gray')))
fig.add_trace(go.Scatter(x=ci_upper.index, y=ci_upper, name='Upper Bound', line=dict(dash='dot', color='gray')))
fig.update_layout(title='SARIMAX Model Forecast for the Next 12 Hours', xaxis_title='Date', yaxis_title='Price', legend_title="Legend")
pio.show(fig)
PythonHere is a plot of the forecast together with a the actual XRP data of the next day.

