-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathiris_starter_script.py
More file actions
73 lines (56 loc) · 2.05 KB
/
iris_starter_script.py
File metadata and controls
73 lines (56 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# coding: utf-8
__author__ = "Francesco Mosconi"
__copyright__ = "Copyright 2016, Data Weekends"
__license__ = "MIT"
__email__ = "info@dataweekends.com"
"""
Simple script detailing some of scikit-learn
functions including:
- train_test_split
- model fitting and evaluation
- decision tree classifier
- confusion matrix
"""
# Import the necessary libraries:
import pandas as pd
# Read data from Files
df = pd.read_csv('iris-2-classes.csv')
df['target'] = df['iris_type'].map({'virginica': 1,
'versicolor': 0})
# Define features (X) and target (y) variables
X = df[['sepal_length_cm', 'sepal_width_cm',
'petal_length_cm', 'petal_width_cm']]
y = df['target']
# Initialize a decision tree model
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(max_depth = 5,
random_state=0)
# Split the features and the target into a Train and a Test subsets.
# Ratio should be 70/30
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.3, random_state=0)
# Train the model
model.fit(X_train, y_train)
# Calculate the model accuracy score
my_score = model.score(X_test, y_test)
print "\n"
print "Using model: %s" % model
print "Classification Score: %0.2f" % my_score
# Print the confusion matrix for the decision tree model
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_test)
print "\n=======confusion matrix=========="
print confusion_matrix(y_test, y_pred)
# ### 3) Iterate and improve
#
# Now you have a basic pipeline. How can you improve the score? Try:
# - changing the parameters of the model
# check the documentation here:
# http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
#
# - changing the model itself
# check examples here:
# http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
#
# - try separating 3 classes of flowers using the ```iris.csv``` dataset provided