goberoi · iborko · Jan 18, 2018 · Jan 18, 2018 · Jan 19, 2018 · Jan 22, 2018
diff --git a/cloudy_vision.py b/cloudy_vision.py
@@ -4,8 +4,6 @@
 import json
 import numpy as np
 import os
-import pprint
-import shutil
 import time
 import re
 import vendors.google
@@ -15,8 +13,9 @@
 import vendors.cloudsight_
 import vendors.rekognition
 
-
 SETTINGS = None
+
+
 def settings(name):
     """Fetch a settings parameter."""
 
@@ -26,18 +25,18 @@ def settings(name):
 
         # Change this dict to suit your taste.
         SETTINGS = {
-            'api_keys_filepath' : './api_keys.json',
-            'input_images_dir' : 'input_images',
-            'output_dir' : 'output',
-            'static_dir' : 'static',
-            'output_image_height' : 200,
-            'vendors' : {
-                'google' : vendors.google,
-                'msft' : vendors.microsoft,
-                'clarifai' : vendors.clarifai_,
-                'ibm' : vendors.ibm,
-                'cloudsight' : vendors.cloudsight_,
-                'rekognition' : vendors.rekognition,
+            'api_keys_filepath': './api_keys.json',
+            'input_images_dir': 'input_images',
+            'output_dir': 'output',
+            'static_dir': 'static',
+            'output_image_height': 200,
+            'vendors': {
+                'google': vendors.google,
+                'msft': vendors.microsoft,
+                'clarifai': vendors.clarifai_,
+                'ibm': vendors.ibm,
+                'cloudsight': vendors.cloudsight_,
+                'rekognition': vendors.rekognition,
             },
             'resize': True,
             'statistics': [
@@ -159,34 +158,40 @@ def process_all_images():
 
         # Create an output object for the image
         image_result = {
-            'input_image_filepath' : filepath,
-            'output_image_filepath' : filename,
-            'vendors' : [],
-            'image_tags' : image_tags,
+            'input_image_filepath': filepath,
+            'output_image_filepath': filename,
+            'vendors': [],
+            'image_tags': image_tags,
         }
         image_results.append(image_result)
 
         # If there's no output file, then resize or copy the input file over
         output_image_filepath = os.path.join(settings('output_dir'), filename)
         if not(os.path.isfile(output_image_filepath)):
-            log_status(filepath, "", "writing output image in %s" % output_image_filepath)
+            log_status(
+                filepath, "",
+                "writing output image in %s" % output_image_filepath)
             if settings('resize'):
                 resize_and_save(filepath, output_image_filepath)
             else:
                 copyfile(filepath, output_image_filepath)
 
         # Walk through all vendor APIs to call.
-        for vendor_name, vendor_module in sorted(settings('vendors').iteritems(), reverse=True):
+        for vendor_name, vendor_module in sorted(
+                settings('vendors').items(), reverse=True):
 
             # Figure out filename to store and retrive cached JSON results.
             output_json_filename = filename + "." + vendor_name + ".json"
-            output_json_path = os.path.join(settings('output_dir'), output_json_filename)
+            output_json_path = os.path.join(
+                settings('output_dir'), output_json_filename)
 
             # Check if the call is already cached.
             if os.path.isfile(output_json_path):
 
-                # If so, read the result from the .json file stored in the output dir.
-                log_status(filepath, vendor_name, "skipping API call, already cached")
+                # If so, read the result from the .json file stored
+                # in the output dir.
+                log_status(filepath, vendor_name,
+                           "skipping API call, already cached")
                 with open(output_json_path, 'r') as infile:
                     api_result = json.loads(infile.read())
 
@@ -195,24 +200,30 @@ def process_all_images():
                 # If not, make the API call for this particular vendor.
                 log_status(filepath, vendor_name, "calling API")
                 api_call_start = time.time()
-                api_result = vendor_module.call_vision_api(filepath, settings('api_keys'))
+                api_result = vendor_module.call_vision_api(
+                    filepath, settings('api_keys'))
                 api_result['response_time'] = time.time() - api_call_start
 
                 # And cache the result in a .json file
-                log_status(filepath, vendor_name, "success, storing result in %s" % output_json_path)
+                log_status(filepath, vendor_name,
+                           "success, storing result in %s" % output_json_path)
                 with open(output_json_path, 'w') as outfile:
-                    api_result_str = json.dumps(api_result, sort_keys=True, indent=4, separators=(',', ': '))
+                    api_result_str = json.dumps(
+                        api_result, sort_keys=True, indent=4,
+                        separators=(',', ': '))
                     outfile.write(api_result_str)
 
                 # Sleep so we avoid hitting throttling limits
                 time.sleep(1)
 
             # Parse the JSON result we fetched (via API call or from cache)
-            standardized_result = vendor_module.get_standardized_result(api_result)
+            standardized_result = vendor_module.get_standardized_result(
+                api_result)
 
             # Sort tags if found
             if 'tags' in standardized_result:
-                standardized_result['tags'].sort(key=lambda tup: tup[1], reverse=True)
+                standardized_result['tags'].sort(
+                    key=lambda tup: tup[1], reverse=True)
 
             # If expected tags are provided, calculate accuracy
             tags_count = 0
@@ -222,28 +233,32 @@ def process_all_images():
                 tags_count = len(standardized_result['tags'])
 
                 if settings('tagged_images'):
-                    matching_tags = find_matching_tags(image_tags, standardized_result)
+                    matching_tags = find_matching_tags(
+                        image_tags, standardized_result)
 
                     if len(matching_tags) > 0:
-                        matching_confidence = sum([t[1] for t in matching_tags]) / len(matching_tags)
+                        matching_confidence = sum(
+                            [t[1] for t in matching_tags]) / len(matching_tags)
 
             image_result['vendors'].append({
-                'api_result' : api_result,
-                'vendor_name' : vendor_name,
-                'standardized_result' : standardized_result,
-                'output_json_filename' : output_json_filename,
-                'response_time' : api_result['response_time'],
-                'tags_count' : tags_count,
-                'matching_tags' : matching_tags,
-                'matching_tags_count' : len(matching_tags),
-                'matching_confidence' : matching_confidence,
+                'api_result': api_result,
+                'vendor_name': vendor_name,
+                'standardized_result': standardized_result,
+                'output_json_filename': output_json_filename,
+                'response_time': api_result['response_time'],
+                'tags_count': tags_count,
+                'matching_tags': matching_tags,
+                'matching_tags_count': len(matching_tags),
+                'matching_confidence': matching_confidence,
             })
 
     # Compute global statistics for each vendor
     vendor_stats = vendor_statistics(image_results)
 
-    # Sort image_results output by filename (so that future runs produce comparable output)
-    image_results.sort(key=lambda image_result: image_result['output_image_filepath'])
+    # Sort image_results output by filename
+    # (so that future runs produce comparable output)
+    image_results.sort(
+        key=lambda image_result: image_result['output_image_filepath'])
 
     # Render HTML file with all results.
     output_html = render_from_template(
@@ -257,7 +272,7 @@ def process_all_images():
     # Write HTML output.
     output_html_filepath = os.path.join(settings('output_dir'), 'output.html')
     with open(output_html_filepath, 'w') as output_html_file:
-        output_html_file.write(output_html.encode('utf-8'))
+        output_html_file.write(output_html)
 
 
 if __name__ == "__main__":

diff --git a/static/template.html b/static/template.html
@@ -76,7 +76,7 @@ <h2 class="image_name">desired_tags:
                 </span>
             </td>
           </tr>
-          {% for feature_name, feature_results in vendor['standardized_result'].iteritems() %}
+          {% for feature_name, feature_results in vendor['standardized_result'].items() %}
           <tr>
               <td class="result_name">
                   {{ vendor['vendor_name'] }}_{{ feature_name }}
@@ -135,7 +135,7 @@ <h4>Stats</h4>
                 On time taken, and number of tags returned. Note that Cloudsight returns captions, not a list of tags, so those counts appear as zero.
             </p>
             <table class="u-full-width">
-                {% for vendor, stats in vendor_stats.iteritems() %}
+                {% for vendor, stats in vendor_stats.items() %}
                 {% if loop.first %}
                 <tr class="raw_json">
                     <td class="result_name">Vendor</td>

diff --git a/vendors/clarifai_.py b/vendors/clarifai_.py
@@ -3,7 +3,7 @@
 
 
 def call_vision_api(image_filename, api_keys):
-    app = ClarifaiApp()
+    app = ClarifaiApp(api_key=api_keys['clarifai']['api_key'])
     model = app.models.get('general-v1.3')
     image = ClImage(file_obj=open(image_filename, 'rb'))
     result = model.predict([image])
@@ -23,6 +23,6 @@ def get_standardized_result(api_result):
         tag_names.append(concept['name'])
         tag_scores.append(concept['value'])
 
-    output['tags'] = zip(tag_names, tag_scores)
+    output['tags'] = list(zip(tag_names, tag_scores))
 
     return output
diff --git a/vendors/cloudsight_.py b/vendors/cloudsight_.py
@@ -1,8 +1,8 @@
 import cloudsight
 
+
 def call_vision_api(image_filename, api_keys):
     api_key = api_keys['cloudsight']['api_key']
-    api_secret = api_keys['cloudsight']['api_secret']
 
     # Via example found here:
     # https://github.com/cloudsight/cloudsight-python
@@ -14,19 +14,20 @@ def call_vision_api(image_filename, api_keys):
         response = api.image_request(image_file, image_filename)
 
     response = api.wait(response['token'], timeout=60)
-    
+
     return response
 
 
 def get_standardized_result(api_result):
     output = {
-        'captions' : [],
+        'captions': [],
     }
 
     if api_result['status'] == 'completed':
         output['captions'].append((api_result["name"], None))
     elif api_result['status'] == 'skipped':
-        output['captions'].append(("error_skipped_because_" + api_result["reason"], None))
+        output['captions'].append(
+            ("error_skipped_because_" + api_result["reason"], None))
     else:
         output['captions'].append(("error_" + api_result["status"], None))
 

diff --git a/vendors/google.py b/vendors/google.py
@@ -2,48 +2,51 @@
 import json
 import requests
 
+
 def _convert_image_to_base64(image_filename):
     with open(image_filename, 'rb') as image_file:
         encoded_string = base64.b64encode(image_file.read()).decode()
 
     return encoded_string
 
+
 def call_vision_api(image_filename, api_keys):
     api_key = api_keys['google']
-    post_url = "https://vision.googleapis.com/v1/images:annotate?key=" + api_key
+    post_url = "https://vision.googleapis.com/v1/images:annotate?key="\
+        + api_key
 
     base64_image = _convert_image_to_base64(image_filename)
 
     post_payload = {
-      "requests": [
-        {
-          "image": {
-            "content" : base64_image
-          },
-          "features": [
-            {
-              "type": "LABEL_DETECTION",
-              "maxResults": 10
-            },
-            {
-              "type": "FACE_DETECTION",
-              "maxResults": 10
-            },
-            {
-              "type": "LANDMARK_DETECTION",
-              "maxResults": 10
-            },
-            {
-              "type": "LOGO_DETECTION",
-              "maxResults": 10
-            },
+        "requests": [
             {
-              "type": "SAFE_SEARCH_DETECTION",
-              "maxResults": 10
-            },
-          ]
-        }
-      ]
+                "image": {
+                    "content": base64_image
+                },
+                "features": [
+                    {
+                        "type": "LABEL_DETECTION",
+                        "maxResults": 10
+                    },
+                    {
+                        "type": "FACE_DETECTION",
+                        "maxResults": 10
+                    },
+                    {
+                        "type": "LANDMARK_DETECTION",
+                        "maxResults": 10
+                    },
+                    {
+                        "type": "LOGO_DETECTION",
+                        "maxResults": 10
+                    },
+                    {
+                        "type": "SAFE_SEARCH_DETECTION",
+                        "maxResults": 10
+                    },
+                ]
+            }
+        ]
     }
 
     result = requests.post(post_url, json=post_payload)
@@ -55,7 +58,7 @@ def call_vision_api(image_filename, api_keys):
 # See this function in microsoft.py for docs.
 def get_standardized_result(api_result):
     output = {
-        'tags' : [],
+        'tags': [],
     }
 
     api_result = api_result['responses'][0]
@@ -69,6 +72,7 @@ def get_standardized_result(api_result):
     if 'logoAnnotations' in api_result:
         output['logo_tags'] = []
         for annotation in api_result['logoAnnotations']:
-            output['logo_tags'].append((annotation['description'], annotation['score']))
+            output['logo_tags'].append(
+                (annotation['description'], annotation['score']))
 
     return output
diff --git a/vendors/ibm.py b/vendors/ibm.py
@@ -1,5 +1,6 @@
 from watson_developer_cloud import VisualRecognitionV3
 
+
 def call_vision_api(image_filename, api_keys):
     api_key = api_keys['ibm']
 
@@ -15,7 +16,7 @@ def call_vision_api(image_filename, api_keys):
 
 def get_standardized_result(api_result):
     output = {
-        'tags' : [],
+        'tags': [],
     }
 
     api_result = api_result["images"][0]