-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSimpleLinearRegression.py
More file actions
86 lines (84 loc) · 2.75 KB
/
SimpleLinearRegression.py
File metadata and controls
86 lines (84 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#importing basics libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
## Read the dataset
df=pd.read_csv('./data/height-weight.csv')
print(df.head())
plt.scatter(df['Weight'],df['Height'])
plt.xlabel("Weight")
plt.ylabel("Height")
plt.show()
## divide our dataset into independent and dependent edatures
X=df[['Weight']] ##independent feature
y=df['Height'] ##dependent feature
## Train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=42)
print(X.shape)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
## standardize the dataset Train independent data
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
print(X_train.head())
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
plt.scatter(X_train,y_train)
plt.show()
## Train the Simple Linear Regression Model
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train,y_train)
print("The slope or coefficient of weight is ",regressor.coef_)
print("Intercept:",regressor.intercept_)
plt.scatter(X_train,y_train)
plt.plot(X_train,regressor.predict(X_train),'r')
plt.show()
### prediction of train data
# 1. Predicted height
# output = intercept + coef_(Weights)
# 2. y_pred_train = 157.5 + 17.03(X_train)
# ### prediction of test data
# 1. predicted height
# output = intercept + coef_(Weights)
# 2. y_pred_test = 157.5 + 17.03(X_test)
y_pred_test=regressor.predict(X_test)
print(y_pred_test,y_test)
plt.scatter(X_test,y_test)
plt.plot(X_test,regressor.predict(X_test),'r')
plt.show()
from sklearn.metrics import mean_squared_error,mean_absolute_error
mse=mean_squared_error(y_test,y_pred_test)
mae=mean_absolute_error(y_test,y_pred_test)
rmse=np.sqrt(mse)
print(mse)
print(mae)
print(rmse)
from sklearn.metrics import r2_score
score=r2_score(y_test,y_pred_test)
print(score)
#display adjusted R-squared
1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
## new data point weight is 80
scaled_weight=scaler.transform([[80]])
print(scaled_weight)
print(scaled_weight[0])
print("The height prediction for weight 80 kg is :",regressor.predict([scaled_weight[0]]))
## Assumptions
## plot a scatter plot for the prediction
plt.scatter(y_test,y_pred_test)
plt.show()
## Residuals
residuals=y_test-y_pred_test
print(residuals)
## plot this residuals
import seaborn as sns
sns.distplot(residuals,kde=True)
plt.show()
## Scatter plot with respect to prediction and residuals
## uniform distribution
plt.scatter(y_pred_test,residuals)
plt.show()