AditAg · AditAg · Mar 6, 2017 · May 14, 2017
diff --git a/cam_face_landmarks.py b/cam_face_landmarks.py
@@ -0,0 +1,25 @@
+import dlib
+import cv2
+
+predictor_path = "shape_predictor_68_face_landmarks.dat"
+feed = cv2.VideoCapture(0)
+print feed.read()[1].shape
+detector = dlib.get_frontal_face_detector()
+predictor = dlib.shape_predictor(predictor_path)
+win = dlib.image_window()
+
+while True:
+	img = feed.read()[1]
+	win.clear_overlay()
+	win.set_image(img)
+
+	dets = detector(img, 1)
+	print("Number of faces detected: {}".format(len(dets)))
+	for k, d in enumerate(dets):
+		print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(k, d.left(), d.top(), d.right(), d.bottom()))
+		shape = predictor(img, d)
+		print("Part 0: {}, Part 1: {} ...".format(shape.part(0),shape.part(1)))
+		win.add_overlay(shape)
+	print("")
+	win.add_overlay(dets)
+	dlib.hit_enter_to_continue()
diff --git a/codes/Co-training_.py b/codes/Co-training_.py
@@ -0,0 +1,86 @@
+'''
+Co-Training Semi-Supervised Learning Approach on CK+ dataset
+
+# Libraries Used
+    pandas, sklearn, numpy
+
+# Variables
+    clf_svm : SVM classifier
+    df : pandas dataframe
+    df_labelled : Dataframe of labelled data
+    df_unlabelled : Dataframe of unlabelled data
+    X : inputs
+    y : labels
+    pred : predictions
+    k : Parameter for co-training
+'''
+
+# Libraries used
+import pandas as pd                                                             # To read database
+from sklearn.cross_validation import train_test_split                           # To split database
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support     # Result of model on database
+import numpy as np                                                              # Mathematical analysis
+from sklearn.svm import SVC                                                     # To apply SVM
+from sklearn.grid_search import GridSearchCV                                    # For Hyperparameter tuning
+
+param_grid = {
+         'C': [1e-2, 1e-1, 1e0, 1e2, 1e1, 1e3, 5e3, 1e4, 5e4,1e5,450000],
+         'kernel': ['linear', 'rbf']
+          }
+
+clf_svm1 = GridSearchCV(SVC(), param_grid) # Classifier 1
+clf_svm2 = GridSearchCV(SVC(), param_grid) # Classifier 2
+
+# Read database
+df = pd.read_csv("emotion.csv",header=0)
+df = df.drop(["Person Id", "Person SubID"],axis=1)
+df_labelled = df[df["Emotion"]!=-1]
+df_unlabelled = df[df["Emotion"]==-1]
+
+# Seperate labelled and unlabelled data
+y_labelled = df_labelled["Emotion"]
+X_labelled = df_labelled.drop(["Emotion"],axis=1)
+
+y_unlabelled = df_unlabelled["Emotion"]
+X_unlabelled = df_unlabelled.drop(["Emotion"],axis=1)
+
+#COTRAINING
+k=4
+X_train1, X_train2, y_train1, y_train2 = train_test_split(np.array(X_labelled),np.array(y_labelled),test_size=0.5,random_state=42)
+length1=X_unlabelled.shape[0]
+X_u=np.array(X_unlabelled)
+X_unlabelled1=np.array(X_u[0:(length1/2)])
+X_unlabelled2=np.array(X_u[(length1/2):])
+np.random.shuffle(X_unlabelled1)
+np.random.shuffle(X_unlabelled2)
+
+low,high=0,k
+while(low<length1/2):
+    clf_svm1.fit(X_train1,y_train1)
+    clf_svm2.fit(X_train2,y_train2)
+    X1=X_unlabelled1[low:high]
+    X2=X_unlabelled2[low:high]
+    pred1=clf_svm1.predict(X1)
+    pred2=clf_svm2.predict(X2)
+
+    X_train1=np.concatenate((X_train1,X2),axis=0)
+    X_train2=np.concatenate((X_train2,X1),axis=0)
+    y_train1=np.concatenate((y_train1,pred2),axis=0)
+    y_train2=np.concatenate((y_train2,pred1),axis=0)
+    low,high=low+k,high+k
+
+
+# Final dataset
+X=np.concatenate((X_train1,X_train2),axis=0)
+y=np.concatenate((y_train1,y_train2),axis=0)
+
+# Split the dataset
+X_train, X_cv, y_train, y_cv = train_test_split(X,y,test_size=0.2,random_state=42)
+
+# SVM
+clf_svm = GridSearchCV(SVC(), param_grid)
+clf_svm.fit(X_train, y_train)
+print clf_svm.best_params_
+pred = clf_svm.predict(X_cv)
+print accuracy_score(pred, y_cv)
+print precision_recall_fscore_support(pred, y_cv, average='weighted', labels=list(range(8)))
diff --git a/codes/EM_curve.py b/codes/EM_curve.py
@@ -0,0 +1,98 @@
+'''
+Supervised Learning Approach on CK+ dataset
+
+# Libraries Used
+    pandas, sklearn, numpy
+
+# Variables
+    clf_svm : SVM classifier
+    clf_knn : KNN classifier
+    clf_rf : Random Forest classifier
+    clf_nb : Naive Bayes classifier
+    clf_nn : Multi-Layer Perceptron classifier
+    clf_ada : AdaBoost Classifier
+    df : pandas dataframe
+    df_labelled : Dataframe of labelled data
+    df_unlabelled : Dataframe of unlabelled data
+    X : inputs
+    y : labels
+    pred : predictions
+'''
+
+# Libraries used
+import pandas as pd                                                             # To read database
+from sklearn.cross_validation import train_test_split                           # To split database
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support     # Result of model on database
+import numpy as np                                                              # Mathematical analysis
+from sklearn.svm import SVC                                                     # Importing Support Vector Classifier
+import matplotlib.pyplot as plt                                                 # Plotting data
+
+# Initialization for graph
+xplt=[0.01,0.05,0.1,0.5,1,2,4,8,10,15]
+ratio=[0.1,0.2,0.3,0.4]
+acc_plt=[[],[],[],[]]
+
+def func(x,j):
+    '''
+    Takes as input C for SVM and split ratio and updates data
+    '''
+    clf_svm = SVC(kernel="rbf", C=x)
+
+    # Dataframe
+    df = pd.read_csv("emotion.csv",header=0)
+    df = df.drop(["Person Id", "Person SubID"],axis=1)
+    df_labelled = df[df["Emotion"]!=-1]
+    df_unlabelled = df[df["Emotion"]==-1]
+
+    # Labelled and unlabelled data
+    # Expectation Maximization
+    y_labelled = df_labelled["Emotion"]
+    X_labelled = df_labelled.drop(["Emotion"],axis=1)
+    y_unlabelled = df_unlabelled["Emotion"]
+    X_unlabelled = df_unlabelled.drop(["Emotion"],axis=1)
+
+    clf_svm.fit(X_labelled,y_labelled)
+    pred = clf_svm.predict(X_unlabelled)
+    df["Emotion"][df["Emotion"]==-1]=pred
+
+    # Final database
+    X=df.drop(["Emotion"],axis=1)
+    y=df["Emotion"]
+
+    # Split the data into training and validation dataset
+    X_train, X_cv, y_train, y_cv = train_test_split(X,y,test_size=j,random_state=42)
+
+    # Get Accuracy plot for data
+    clf_svm = SVC(kernel="rbf", C=450000)
+    clf_svm.fit(X_train, y_train)
+    pred = clf_svm.predict(X_cv)
+    a=accuracy_score(pred, y_cv)
+    a=a.astype(np.float64)
+    if j==0.1:
+        i=0
+    elif j==0.2:
+        i=1
+    elif j==0.3:
+        i=2
+    else:
+        i=3
+    acc_plt[i].append(float(a)*100)
+
+# Get plots for data
+for j in ratio:
+    for i in xplt:
+        func(i,j)
+
+# Plot the data
+plt.figure()
+plt.plot(xplt,acc_plt[0],"navy",label="Split Ratio - 0.1")
+plt.plot(xplt, acc_plt[1],"darkorange",label="Split Ratio - 0.2")
+plt.plot(xplt,acc_plt[2],"red",label="Split Ratio - 0.3")
+plt.plot(xplt,acc_plt[3],color="green",label="Split Ratio - 0.4")
+plt.xlabel('Value of C')
+plt.ylabel('Accuracy')
+plt.xlim((0,15))
+plt.ylim((80,102))
+plt.title('Comparison of Accuracy, C and Split Ration')
+plt.legend(loc="lower right")
+plt.show()
diff --git a/codes/RFClassifier_plot.py b/codes/RFClassifier_plot.py
@@ -0,0 +1,61 @@
+'''
+Hyper-Parameter Tuning for Random Forest Classifier
+
+# Libraries Used
+    pandas,sklearn,numpy
+
+# Variables Used
+
+'''
+
+# Libraries used
+import pandas as pd                                                             # To read database
+from sklearn.cross_validation import train_test_split                           # To split database
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support     # Result of model on database
+import numpy as np                                                              # Mathematical analysis
+import matplotlib.pyplot as plt                                                 # Plotting result
+from mpl_toolkits.mplot3d import Axes3D                                         # 3D Axes
+
+# Read database
+df = pd.read_csv("emotion.csv",header=0)
+df = df.drop(["Person Id", "Person SubID"],axis=1)
+df_train = df[df["Emotion"]!=-1]                    # Training Dataframe
+df_test = df[df["Emotion"]==-1]                     # Testing Dataframe
+
+# Final dataset
+y = np.array(df_train["Emotion"])
+x = np.array(df_train.drop(["Emotion"],axis=1))
+
+# Split the dataset
+# Split into training and validation data
+X_train, X_cv, y_train, y_cv = train_test_split(x,y,test_size=0.2,random_state=42)
+
+X,Y,Z = list(),list(),list()
+from sklearn.ensemble import RandomForestClassifier
+for i in range(5,105,5):
+    for j in range(1,7):
+        clf_rf = RandomForestClassifier(n_estimators=i, min_samples_split=2**j)
+        print "Training Random Forest classifier..."
+        clf_rf.fit(X_train, y_train)
+        pred = clf_rf.predict(X_cv)
+        Z=accuracy_score(pred, y_cv)
+        print precision_recall_fscore_support(pred, y_cv, average='weighted', labels=list(range(8)))
+        X.append(i)
+        Y.append(2**j)
+        Z.append(Z*100)
+
+#Plot the 3D scatter plot for Random Forest Classifier using Axes3D module.
+fig = plt.figure()
+ax = Axes3D(fig)
+ax.scatter(X,Y,Z,zdir='z')
+
+#Provide attributes for the plot
+ax.set_xlim(0, 100)
+ax.set_ylim(0, 70)
+ax.set_zlim(70,100)
+ax.set_xlabel('The no. of trees in the forest')
+ax.set_ylabel('Minimum No. of samples required to split an internal node')
+ax.set_zlabel('Accuracy obtained.')
+
+#Display the plot.
+plt.show()
diff --git a/codes/ROC_SVM_all-labels.py b/codes/ROC_SVM_all-labels.py
@@ -0,0 +1,120 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.preprocessing import label_binarize
+from sklearn.cross_validation import train_test_split
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+import numpy as np
+import sklearn.metrics
+from sklearn.metrics import roc_curve, auc
+from scipy import interp
+from itertools import cycle
+
+
+df = pd.read_csv("emotion.csv",header=0)
+
+df = df.drop(["Person Id", "Person SubID"],axis=1)
+df_train = df[df["Emotion"]!=-1]
+df_test = df[df["Emotion"]==-1]
+
+y = np.array(df_train["Emotion"])
+X = np.array(df_train.drop(["Emotion"],axis=1))
+#
+y = label_binarize(y, classes=[1, 2,3,4,5,6,7])
+random_state = np.random.RandomState(0)
+n_samples, n_features = X.shape
+X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
+
+X_train, X_cv, y_train, y_cv = train_test_split(X,y,test_size=0.2,random_state=42)
+
+print X_train.shape, X_cv.shape
+
+# SVM, CNN, ANN, KNN, Random Forest, Naive Bayes
+from sklearn.svm import SVC
+clf_svm =  OneVsRestClassifier(SVC(kernel="rbf", C=10000))
+from sklearn.neighbors import KNeighborsClassifier
+clf_knn = OneVsRestClassifier(KNeighborsClassifier(n_neighbors=3))
+from sklearn.ensemble import RandomForestClassifier
+clf_rf = OneVsRestClassifier(RandomForestClassifier(n_estimators=10, min_samples_split=50))
+from sklearn.naive_bayes import GaussianNB
+clf_nb = OneVsRestClassifier(GaussianNB())
+from sklearn.neural_network import MLPClassifier
+clf_nn = OneVsRestClassifier(MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 4), random_state=1))
+from sklearn.ensemble import AdaBoostClassifier
+clf_ada = OneVsRestClassifier(AdaBoostClassifier(n_estimators=50))
+
+n_classes=7
+
+print "Training SVM classifier.."
+clf_svm.fit(X_train, y_train)
+pred = clf_svm.decision_function(X_cv)
+
+fpr = dict()
+tpr = dict()
+roc_auc = dict()
+for i in range(n_classes):
+    fpr[i], tpr[i], _ = roc_curve(y_cv[:, i], pred[:, i])
+    roc_auc[i] = auc(fpr[i], tpr[i])
+
+
+##plt.figure()
+##lw = 2
+##plt.plot(fpr[2], tpr[2], color='darkorange',
+##         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2])
+##plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+##plt.xlim([0.0, 1.0])
+##plt.ylim([0.0, 1.05])
+##plt.xlabel('False Positive Rate')
+##plt.ylabel('True Positive Rate')
+##plt.title('Receiver operating characteristic example')
+##plt.legend(loc="lower right")
+##plt.show()
+##
+##
+##
+###print "Training Adaboost classifier..."
+#clf_ada.fit(X_train, y_train)
+#pred = clf_ada.predict(X_cv)
+v={0:'anger',1:'contempt',2:'disgust',3:'fear',4:'happy',5:'sadness',6:'surprise'}
+fpr["micro"], tpr["micro"], _ = roc_curve(y_cv.ravel(), pred.ravel())
+roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
+all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
+lw=2
+# Then interpolate all ROC curves at this points
+mean_tpr = np.zeros_like(all_fpr)
+for i in range(n_classes):
+    mean_tpr += interp(all_fpr, fpr[i], tpr[i])
+
+# Finally average it and compute AUC
+mean_tpr /= n_classes
+
+fpr["macro"] = all_fpr
+tpr["macro"] = mean_tpr
+roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
+
+# Plot all ROC curves
+plt.figure()
+plt.plot(fpr["micro"], tpr["micro"],
+         label='micro-average ROC curve (area = {0:0.2f})'
+               ''.format(roc_auc["micro"]),
+         color='deeppink', linestyle=':', linewidth=4)
+
+plt.plot(fpr["macro"], tpr["macro"],
+         label='macro-average ROC curve (area = {0:0.2f})'
+               ''.format(roc_auc["macro"]),
+         color='navy', linestyle=':', linewidth=4)
+
+colors = cycle(['aqua', 'darkorange', 'cornflowerblue','red','green','purple','lightgreen'])
+for i, color in zip(range(n_classes), colors):
+    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
+             label='ROC curve of class {0} (area = {1:0.2f})'
+             ''.format(v[i], roc_auc[i]))
+
+plt.plot([0, 1], [0, 1], 'k--', lw=lw)
+plt.xlim([0.0, 1.0])
+plt.ylim([0.0, 1.05])
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Some extension of Receiver operating characteristic to multi-class')
+plt.legend(loc="lower right")
+plt.show()