'XGBoost regression predictions are shifted along the y axis

XGboost regression output is scaled on the Y axis as shown in the image below. Ytest vs Ypred I have provided the code below which got me these results. Tried scaling the values using the different scalers but to no avail. Faced this same issue while implementing an LSTM model before as well. any inputs appreciated.

    from xgboost import XGBRegressor
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import os
import pprint
import plotly.express as px
import plotly.graph_objects as go
import webbrowser
from math import sqrt
import matplotlib.pyplot as plt

# * load the data
li = []
for file in os.listdir('.\\emission_data'):
    imp = pd.read_csv(f'.\\emission_data\\{file}', index_col=None, header=0)
    li.append(imp)

df1 = pd.concat(li, axis=0, ignore_index=True)


df1.plot(subplots=True, layout=(6, 2))
plt.show()
# print(df1.head)

# * split into X and Y
X = df1.iloc[:, 0:10]
Y = df1.iloc[:, 10]
# print(X.shape, Y.shape)

# * split sets
split = 0.6

Xtrain, Xtest = X.iloc[:int(len(X.index)*split),
                       :], X.iloc[int(len(X.index)*split):, :]
# print(Xtrain.shape, '\t', Xtest.shape)
Ytrain, Ytest = Y.iloc[:int(len(Y.index)*split)
                       ], Y.iloc[int(len(Y.index)*split):]
# print(Ytrain.shape, '\t', Ytest.shape)

#! converting target to numpy array and rehaping since scalerneeds it that way
Ytrain = Ytrain.to_numpy()
Ytrain = Ytrain.reshape((-1, 1))

# * scale train X, Y and test X
rbsxt = RobustScaler()
rbsyt = RobustScaler()
rbsxte = RobustScaler()

Xtrain = pd.DataFrame(rbsxt.fit_transform(
    Xtrain), index=Xtrain.index, columns=Xtrain.columns)
Ytrain = pd.DataFrame(rbsyt.fit_transform(
    Ytrain))  # , index=Ytrain.index, columns=Ytrain.columns)
Xtest = pd.DataFrame(rbsxte.fit_transform(
    Xtest), index=Xtest.index, columns=Xtest.columns)

model = XGBRegressor()

model.fit(Xtrain, Ytrain)

ypred = model.predict(Xtest)
ypred = ypred.reshape((-1, 1))

ypred = rbsyt.inverse_transform(ypred)
rmse = sqrt(mean_squared_error(Ytest, ypred))
print(rmse)


# fig = px.line(Ytest,  title='Ytest')
Ytest_np = Ytest.to_numpy()
ypred = ypred.flatten()

fig = go.Figure()
fig.add_trace(go.Scatter(y=Ytest_np,
                         mode='lines',
                         name='Ytest'))
fig.add_trace(go.Scatter(y=ypred,
                         mode='lines',
                         name='ypred'))
fig.write_html(f'.\\Ytest.html')

new = 2
chromepath = 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s'
webbrowser.get(chromepath).open(f'.\\Ytest.html', new=new)


Solution 1:[1]

plt.ylim(0,) or set_ylim for axes should fix this in matplotlib, why don't you try that for yourself!

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Farhan Hai Khan