Machine_learning/FeatureScaling.py at main · gouri1694/Machine_learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import scipy.stats as stat
import pylab
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# # Standarlization
# df=sns.load_dataset('tips')
# print(df.head())
# mean=np.mean(df['total_bill'])
# std=np.std(df['total_bill'])
# print(mean,std)
# normalized_data=[(i-mean)/std for i in df['total_bill'].tolist()]
# print(normalized_data)
# # sns.histplot(df['total_bill'])
# # sns.histplot(normalized_data)
# # plt.show()
# from sklearn.preprocessing import StandardScaler
# scaler=StandardScaler()
# # scaler.fit(df[['total_bill']])
# # scaler.transform(df[['total_bill']])
# # standardized_data=scaler.fit_transform(df[['total_bill']])
# # dfnew=pd.DataFrame(standardized_data,columns=['total_bill'])
# # print(dfnew)
# standardized_data=scaler.fit_transform(df[['total_bill','tip']])
# dfnew=pd.DataFrame(standardized_data,columns=['total_bill','tip']).reset_index(drop=True)
# print(dfnew)

# # Normalization-Min Max Scaler
# df=sns.load_dataset('taxis')
# print(df.head())
# from sklearn.preprocessing import MinMaxScaler
# min_max=MinMaxScaler()
# normalizedData=min_max.fit_transform(df[['distance','fare','tip']])
# dfnew=pd.DataFrame(normalizedData,columns=['distance','fare','tip']).reset_index(drop=True)
# print(dfnew)

# Unit Vector
df=sns.load_dataset('taxis')
from sklearn.preprocessing import normalize
unitnormalize=normalize(df[['distance','fare','tip']])
dfnew=pd.DataFrame(unitnormalize,columns=['distance','fare','tip']).reset_index(drop=True)
print(dfnew)