From ab2bc040b386aad992f3e7865d1598da59d65346 Mon Sep 17 00:00:00 2001 From: Mace Ojala Date: Thu, 4 Jul 2019 20:42:12 +0200 Subject: [PATCH 1/5] Added the membership to clusters in output --- utils/process_images.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/process_images.py b/utils/process_images.py index 626d6d45..0463db03 100755 --- a/utils/process_images.py +++ b/utils/process_images.py @@ -279,9 +279,11 @@ def get_centroids(self): centroid_paths = [self.vector_files[i] for i in closest] centroid_json = [] for c, i in enumerate(centroid_paths): + print([item for item, label in enumerate(fit_model.labels_) if label == c]) centroid_json.append({ 'img': get_filename(i), - 'label': 'Cluster ' + str(c+1) + 'label': 'Cluster ' + str(c+1), + 'members': [item for item, label in enumerate(fit_model.labels_) if label == c] }) return centroid_json From 2c05be1add5422cd1732b71eb3b753aaccf5c963 Mon Sep 17 00:00:00 2001 From: Mace Ojala Date: Thu, 4 Jul 2019 20:49:51 +0200 Subject: [PATCH 2/5] Refactored k-means clustering to it's own function --- utils/process_images.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/utils/process_images.py b/utils/process_images.py index 0463db03..7732b6bb 100755 --- a/utils/process_images.py +++ b/utils/process_images.py @@ -263,27 +263,36 @@ def get_image_positions(self, fit_model): ]) return image_positions + def build_clustering(self): + ''' + Use KMeans clustering to find n centroids + ''' + print(' * calculating ' + str(self.n_clusters) + ' clusters') + model = KMeans(n_clusters=self.n_clusters) + X = np.array(self.image_vectors) + fit_model = model.fit(X) + self.clustering = fit_model + def get_centroids(self): ''' Use KMeans clustering to find n centroid images that represent the center of an image cluster ''' - print(' * calculating ' + str(self.n_clusters) + ' clusters') - model = KMeans(n_clusters=self.n_clusters) + self.build_clustering() + centroids = self.clustering.cluster_centers_ + labels = self.clustering.labels_ X = np.array(self.image_vectors) - fit_model = model.fit(X) - centroids = fit_model.cluster_centers_ # find the points closest to the cluster centroids closest, _ = pairwise_distances_argmin_min(centroids, X) centroid_paths = [self.vector_files[i] for i in closest] centroid_json = [] for c, i in enumerate(centroid_paths): - print([item for item, label in enumerate(fit_model.labels_) if label == c]) + print([item for item, label in enumerate(labels) if label == c]) centroid_json.append({ 'img': get_filename(i), 'label': 'Cluster ' + str(c+1), - 'members': [item for item, label in enumerate(fit_model.labels_) if label == c] + 'members': [item for item, label in enumerate(labels) if label == c] }) return centroid_json From d67ddf8ea9a780b291c76cb356da4c237a33c14c Mon Sep 17 00:00:00 2001 From: Mace Ojala Date: Thu, 4 Jul 2019 20:52:34 +0200 Subject: [PATCH 3/5] Moved clustering to the class constructor, plus removed a dev printing thing --- utils/process_images.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/process_images.py b/utils/process_images.py index 7732b6bb..73559325 100755 --- a/utils/process_images.py +++ b/utils/process_images.py @@ -68,6 +68,7 @@ def __init__(self, image_glob): self.create_image_thumbs() self.create_image_vectors() self.load_image_vectors() + self.build_clustering() self.write_json() self.create_atlas_files() print('Processed output for ' + \ @@ -279,7 +280,6 @@ def get_centroids(self): Use KMeans clustering to find n centroid images that represent the center of an image cluster ''' - self.build_clustering() centroids = self.clustering.cluster_centers_ labels = self.clustering.labels_ X = np.array(self.image_vectors) @@ -288,7 +288,6 @@ def get_centroids(self): centroid_paths = [self.vector_files[i] for i in closest] centroid_json = [] for c, i in enumerate(centroid_paths): - print([item for item, label in enumerate(labels) if label == c]) centroid_json.append({ 'img': get_filename(i), 'label': 'Cluster ' + str(c+1), From 4f964fe137df2c5d5e9224015e91f2a0a6d8b379 Mon Sep 17 00:00:00 2001 From: Mace Ojala Date: Thu, 4 Jul 2019 20:59:29 +0200 Subject: [PATCH 4/5] Shifted to versioned, compatibility TensorFlow API, as suggested by the deprecation warnings. --- utils/process_images.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/utils/process_images.py b/utils/process_images.py index 626d6d45..d5168b98 100755 --- a/utils/process_images.py +++ b/utils/process_images.py @@ -151,7 +151,7 @@ def create_image_vectors(self): self.create_tf_graph() print(' * creating image vectors') - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: for image_index, image in enumerate(self.image_files): try: print(' * processing image', image_index+1, 'of', len(self.image_files)) @@ -160,7 +160,7 @@ def create_image_vectors(self): if os.path.exists(out_path) and not self.rewrite_image_vectors: continue # save the penultimate inception tensor/layer of the current image - with tf.gfile.FastGFile(image, 'rb') as f: + with tf.io.gfile.GFile(image, 'rb') as f: data = {'DecodeJpeg/contents:0': f.read()} feature_tensor = sess.graph.get_tensor_by_name('pool_3:0') feature_vector = np.squeeze( sess.run(feature_tensor, data) ) @@ -199,8 +199,8 @@ def create_tf_graph(self): ''' print(' * creating tf graph') graph_path = join(FLAGS.model_dir, 'classify_image_graph_def.pb') - with tf.gfile.FastGFile(graph_path, 'rb') as f: - graph_def = tf.GraphDef() + with tf.io.gfile.GFile(graph_path, 'rb') as f: + graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') @@ -480,4 +480,4 @@ def main(*args, **kwargs): PixPlot(image_glob) if __name__ == '__main__': - tf.app.run() + tf.compat.v1.app.run() From 15011e13d9c41193ded2da177b2c5a1225f4f207 Mon Sep 17 00:00:00 2001 From: Mace Ojala Date: Thu, 4 Jul 2019 21:25:18 +0200 Subject: [PATCH 5/5] Add the cluster information to each of the images in the JSON output file. At this point, I am thinking that I hope this won't confuse the UI which read the file for rendering. --- utils/process_images.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/utils/process_images.py b/utils/process_images.py index 59caa31b..37fa8e2b 100755 --- a/utils/process_images.py +++ b/utils/process_images.py @@ -254,19 +254,21 @@ def get_image_positions(self, fit_model): thumb_path = join(self.output_dir, 'thumbs', '32px', img) with Image.open(thumb_path) as image: width, height = image.size - # Add the image name, x offset, y offset + cluster = int(self.clustering.labels_[c]) + 1 # Because PixPlot.get_centroids() names them from 1 onwards + # Add the image name, x offset, y offset, cluster image_positions.append([ os.path.splitext(os.path.basename(img))[0], int(i[0] * 100), int(i[1] * 100), width, - height + height, + cluster ]) return image_positions def build_clustering(self): ''' - Use KMeans clustering to find n centroids + Use KMeans clustering to find n centroids. ''' print(' * calculating ' + str(self.n_clusters) + ' clusters') model = KMeans(n_clusters=self.n_clusters) @@ -277,8 +279,7 @@ def build_clustering(self): def get_centroids(self): ''' - Use KMeans clustering to find n centroid images - that represent the center of an image cluster + Find n centroid images that represent the center of an image cluster ''' centroids = self.clustering.cluster_centers_ labels = self.clustering.labels_