-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStressLevelClassification.py
More file actions
131 lines (108 loc) · 4.63 KB
/
StressLevelClassification.py
File metadata and controls
131 lines (108 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from sklearn.cluster import KMeans
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import requests
# Load the dataset
url = 'https://docs.google.com/uc?export=download&id=184-NNnzhNmB1IIjz-7YTW5SrkVFT6fF_'
response = requests.get(url)
with open('StressLevelDataset.csv', 'w') as file:
file.write(response.text)
data = pd.read_csv("StressLevelDataset.csv")
label_feature = data.columns[:]
dataset = data.columns.str.replace('_', ' ').str.title()
# Sidebar menu to select features
st.sidebar.header("Navigation")
navigation_menu = st.sidebar.radio("Go to", ["Exploratory Data Analysis", "Clustering", "Prediction"])
if navigation_menu == "Exploratory Data Analysis":
st.title("Dataset Information")
st.header("Dataset Kaggle")
st.write(data)
col = 3
row = np.ceil((data.shape[1] - 1) / col).astype(int)
fig, ax = plt.subplots(row, col, figsize=(14, 30))
axi = ax.ravel()
for i, c in enumerate(label_feature):
uni = len(data[c].unique())
axi[i].hist(data[c].tolist(), bins=np.abs(uni), edgecolor='black')
axi[i].set_title(c)
axi[i].set_ylabel('Freq')
if uni < 10:
axi[i].set_xticks(data[c].unique())
st.header("Visualization of Dataset")
st.pyplot(fig)
st.header("Corelation of Dataset")
# Create a heatmap using Seaborn
heat = plt.figure(figsize=(8, 6))
sns.heatmap(data.corr(numeric_only=True), cmap='turbo')
plt.title("Correlation Heatmap")
# Display the heatmap using st.pyplot()
st.pyplot(heat)
elif navigation_menu == "Clustering":
st.sidebar.header("Select The Features")
selected_feature1 = st.sidebar.selectbox('Select Feature 1', label_feature)
selected_feature2 = st.sidebar.selectbox('Select Feature 2', label_feature)
st.title("Clustering using KMeans")
kmeans = KMeans(n_clusters=3) # Set the number of clusters as needed
data['Cluster'] = kmeans.fit_predict(data[label_feature])
# Scatter plot for clustering
clust = plt.figure(figsize=(8, 6))
plt.scatter(data[selected_feature1], data[selected_feature2], c=data['Cluster'], cmap='viridis')
plt.title('Clustering of Data Points')
plt.xlabel(selected_feature1)
plt.ylabel(selected_feature2)
# Display the scatter plot using st.pyplot()
st.pyplot(clust)
elif navigation_menu == "Prediction":
# Split the dataset
X = data.drop('stress_level', axis=1)
y = data['stress_level']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create classifiers
weak_classifier = DecisionTreeClassifier(max_depth=1)
ada_classifier = AdaBoostClassifier(weak_classifier, n_estimators=50, random_state=42)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
# Fit classifiers to the training data
ada_classifier.fit(X_train, y_train)
rf_classifier.fit(X_train, y_train)
# Streamlit app
st.title("Stress Level Prediction")
st.header("Input Your Characteristic")
# Collect user input
user_input_dict = {}
for column in data.columns[:-1]:
min_value = data[column].min()
max_value = data[column].max()
user_input = st.number_input(
label=f"Enter {column} value ({min_value} - {max_value})",
min_value=min_value,
max_value=max_value,
value=min_value
)
user_input_dict[column] = user_input
# Button to trigger classification
if st.button("Predict Stress Level"):
user_input_data = pd.DataFrame([user_input_dict])
# Predict with AdaBoost
ada_prediction = ada_classifier.predict(user_input_data)
# Predict with Random Forest
rf_prediction = rf_classifier.predict(user_input_data)
# Calculate and display accuracy
st.subheader("Prediction Results:")
st.write(f"AdaBoost Prediction: {ada_prediction[0]}")
st.write(f"Random Forest Prediction: {rf_prediction[0]}")
# Evaluate accuracy
y_pred_ada = ada_classifier.predict(X_test)
y_pred_rf = rf_classifier.predict(X_test)
accuracy_ada = accuracy_score(y_test, y_pred_ada)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
st.subheader("Model Accuracy:")
st.write(f"AdaBoost Accuracy: {accuracy_ada:.2%}")
st.write(f"Random Forest Accuracy: {accuracy_rf:.2%}")