From ab2bc040b386aad992f3e7865d1598da59d65346 Mon Sep 17 00:00:00 2001
From: Mace Ojala <maco@itu.dk>
Date: Thu, 4 Jul 2019 20:42:12 +0200
Subject: [PATCH 1/5] Added the membership to clusters in output

---
 utils/process_images.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/utils/process_images.py b/utils/process_images.py
index 626d6d45..0463db03 100755
--- a/utils/process_images.py
+++ b/utils/process_images.py
@@ -279,9 +279,11 @@ def get_centroids(self):
     centroid_paths = [self.vector_files[i] for i in closest]
     centroid_json = []
     for c, i in enumerate(centroid_paths):
+      print([item for item, label in enumerate(fit_model.labels_) if label == c])
       centroid_json.append({
         'img': get_filename(i),
-        'label': 'Cluster ' + str(c+1)
+        'label': 'Cluster ' + str(c+1),
+        'members': [item for item, label in enumerate(fit_model.labels_) if label == c]
       })
     return centroid_json
 

From 2c05be1add5422cd1732b71eb3b753aaccf5c963 Mon Sep 17 00:00:00 2001
From: Mace Ojala <maco@itu.dk>
Date: Thu, 4 Jul 2019 20:49:51 +0200
Subject: [PATCH 2/5] Refactored k-means clustering to it's own function

---
 utils/process_images.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/utils/process_images.py b/utils/process_images.py
index 0463db03..7732b6bb 100755
--- a/utils/process_images.py
+++ b/utils/process_images.py
@@ -263,27 +263,36 @@ def get_image_positions(self, fit_model):
       ])
     return image_positions
 
+  def build_clustering(self):
+    '''
+    Use KMeans clustering to find n centroids
+    '''
+    print(' * calculating ' + str(self.n_clusters) + ' clusters')
+    model = KMeans(n_clusters=self.n_clusters)
+    X = np.array(self.image_vectors)
+    fit_model = model.fit(X)
+    self.clustering = fit_model
+
 
   def get_centroids(self):
     '''
     Use KMeans clustering to find n centroid images
     that represent the center of an image cluster
     '''
-    print(' * calculating ' + str(self.n_clusters) + ' clusters')
-    model = KMeans(n_clusters=self.n_clusters)
+    self.build_clustering()
+    centroids = self.clustering.cluster_centers_
+    labels = self.clustering.labels_
     X = np.array(self.image_vectors)
-    fit_model = model.fit(X)
-    centroids = fit_model.cluster_centers_
     # find the points closest to the cluster centroids
     closest, _ = pairwise_distances_argmin_min(centroids, X)
     centroid_paths = [self.vector_files[i] for i in closest]
     centroid_json = []
     for c, i in enumerate(centroid_paths):
-      print([item for item, label in enumerate(fit_model.labels_) if label == c])
+      print([item for item, label in enumerate(labels) if label == c])
       centroid_json.append({
         'img': get_filename(i),
         'label': 'Cluster ' + str(c+1),
-        'members': [item for item, label in enumerate(fit_model.labels_) if label == c]
+        'members': [item for item, label in enumerate(labels) if label == c]
       })
     return centroid_json
 

From d67ddf8ea9a780b291c76cb356da4c237a33c14c Mon Sep 17 00:00:00 2001
From: Mace Ojala <maco@itu.dk>
Date: Thu, 4 Jul 2019 20:52:34 +0200
Subject: [PATCH 3/5] Moved clustering to the class constructor, plus removed a
 dev printing thing

---
 utils/process_images.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/utils/process_images.py b/utils/process_images.py
index 7732b6bb..73559325 100755
--- a/utils/process_images.py
+++ b/utils/process_images.py
@@ -68,6 +68,7 @@ def __init__(self, image_glob):
     self.create_image_thumbs()
     self.create_image_vectors()
     self.load_image_vectors()
+    self.build_clustering()
     self.write_json()
     self.create_atlas_files()
     print('Processed output for ' + \
@@ -279,7 +280,6 @@ def get_centroids(self):
     Use KMeans clustering to find n centroid images
     that represent the center of an image cluster
     '''
-    self.build_clustering()
     centroids = self.clustering.cluster_centers_
     labels = self.clustering.labels_
     X = np.array(self.image_vectors)
@@ -288,7 +288,6 @@ def get_centroids(self):
     centroid_paths = [self.vector_files[i] for i in closest]
     centroid_json = []
     for c, i in enumerate(centroid_paths):
-      print([item for item, label in enumerate(labels) if label == c])
       centroid_json.append({
         'img': get_filename(i),
         'label': 'Cluster ' + str(c+1),

From 4f964fe137df2c5d5e9224015e91f2a0a6d8b379 Mon Sep 17 00:00:00 2001
From: Mace Ojala <maco@itu.dk>
Date: Thu, 4 Jul 2019 20:59:29 +0200
Subject: [PATCH 4/5] Shifted to versioned, compatibility TensorFlow API, as
 suggested by the deprecation warnings.

---
 utils/process_images.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/utils/process_images.py b/utils/process_images.py
index 626d6d45..d5168b98 100755
--- a/utils/process_images.py
+++ b/utils/process_images.py
@@ -151,7 +151,7 @@ def create_image_vectors(self):
     self.create_tf_graph()
 
     print(' * creating image vectors')
-    with tf.Session() as sess:
+    with tf.compat.v1.Session() as sess:
       for image_index, image in enumerate(self.image_files):
         try:
           print(' * processing image', image_index+1, 'of', len(self.image_files))
@@ -160,7 +160,7 @@ def create_image_vectors(self):
           if os.path.exists(out_path) and not self.rewrite_image_vectors:
             continue
           # save the penultimate inception tensor/layer of the current image
-          with tf.gfile.FastGFile(image, 'rb') as f:
+          with tf.io.gfile.GFile(image, 'rb') as f:
             data = {'DecodeJpeg/contents:0': f.read()}
             feature_tensor = sess.graph.get_tensor_by_name('pool_3:0')
             feature_vector = np.squeeze( sess.run(feature_tensor, data) )
@@ -199,8 +199,8 @@ def create_tf_graph(self):
     '''
     print(' * creating tf graph')
     graph_path = join(FLAGS.model_dir, 'classify_image_graph_def.pb')
-    with tf.gfile.FastGFile(graph_path, 'rb') as f:
-      graph_def = tf.GraphDef()
+    with tf.io.gfile.GFile(graph_path, 'rb') as f:
+      graph_def = tf.compat.v1.GraphDef()
       graph_def.ParseFromString(f.read())
       _ = tf.import_graph_def(graph_def, name='')
 
@@ -480,4 +480,4 @@ def main(*args, **kwargs):
   PixPlot(image_glob)
 
 if __name__ == '__main__':
-  tf.app.run()
+  tf.compat.v1.app.run()

From 15011e13d9c41193ded2da177b2c5a1225f4f207 Mon Sep 17 00:00:00 2001
From: Mace Ojala <maco@itu.dk>
Date: Thu, 4 Jul 2019 21:25:18 +0200
Subject: [PATCH 5/5] Add the cluster information to each of the images in the
 JSON output file. At this point, I am thinking that I hope this won't confuse
 the UI which read the file for rendering.

---
 utils/process_images.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/utils/process_images.py b/utils/process_images.py
index 59caa31b..37fa8e2b 100755
--- a/utils/process_images.py
+++ b/utils/process_images.py
@@ -254,19 +254,21 @@ def get_image_positions(self, fit_model):
       thumb_path = join(self.output_dir, 'thumbs', '32px', img)
       with Image.open(thumb_path) as image:
         width, height = image.size
-      # Add the image name, x offset, y offset
+      cluster = int(self.clustering.labels_[c]) + 1 # Because PixPlot.get_centroids() names them from 1 onwards
+      # Add the image name, x offset, y offset, cluster
       image_positions.append([
         os.path.splitext(os.path.basename(img))[0],
         int(i[0] * 100),
         int(i[1] * 100),
         width,
-        height
+        height,
+        cluster
       ])
     return image_positions
 
   def build_clustering(self):
     '''
-    Use KMeans clustering to find n centroids
+    Use KMeans clustering to find n centroids.
     '''
     print(' * calculating ' + str(self.n_clusters) + ' clusters')
     model = KMeans(n_clusters=self.n_clusters)
@@ -277,8 +279,7 @@ def build_clustering(self):
 
   def get_centroids(self):
     '''
-    Use KMeans clustering to find n centroid images
-    that represent the center of an image cluster
+    Find n centroid images that represent the center of an image cluster
     '''
     centroids = self.clustering.cluster_centers_
     labels = self.clustering.labels_