Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 58 additions & 43 deletions cloudy_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import json
import numpy as np
import os
import pprint
import shutil
import time
import re
import vendors.google
Expand All @@ -15,8 +13,9 @@
import vendors.cloudsight_
import vendors.rekognition


SETTINGS = None


def settings(name):
"""Fetch a settings parameter."""

Expand All @@ -26,18 +25,18 @@ def settings(name):

# Change this dict to suit your taste.
SETTINGS = {
'api_keys_filepath' : './api_keys.json',
'input_images_dir' : 'input_images',
'output_dir' : 'output',
'static_dir' : 'static',
'output_image_height' : 200,
'vendors' : {
'google' : vendors.google,
'msft' : vendors.microsoft,
'clarifai' : vendors.clarifai_,
'ibm' : vendors.ibm,
'cloudsight' : vendors.cloudsight_,
'rekognition' : vendors.rekognition,
'api_keys_filepath': './api_keys.json',
'input_images_dir': 'input_images',
'output_dir': 'output',
'static_dir': 'static',
'output_image_height': 200,
'vendors': {
'google': vendors.google,
'msft': vendors.microsoft,
'clarifai': vendors.clarifai_,
'ibm': vendors.ibm,
'cloudsight': vendors.cloudsight_,
'rekognition': vendors.rekognition,
},
'resize': True,
'statistics': [
Expand Down Expand Up @@ -159,34 +158,40 @@ def process_all_images():

# Create an output object for the image
image_result = {
'input_image_filepath' : filepath,
'output_image_filepath' : filename,
'vendors' : [],
'image_tags' : image_tags,
'input_image_filepath': filepath,
'output_image_filepath': filename,
'vendors': [],
'image_tags': image_tags,
}
image_results.append(image_result)

# If there's no output file, then resize or copy the input file over
output_image_filepath = os.path.join(settings('output_dir'), filename)
if not(os.path.isfile(output_image_filepath)):
log_status(filepath, "", "writing output image in %s" % output_image_filepath)
log_status(
filepath, "",
"writing output image in %s" % output_image_filepath)
if settings('resize'):
resize_and_save(filepath, output_image_filepath)
else:
copyfile(filepath, output_image_filepath)

# Walk through all vendor APIs to call.
for vendor_name, vendor_module in sorted(settings('vendors').iteritems(), reverse=True):
for vendor_name, vendor_module in sorted(
settings('vendors').items(), reverse=True):

# Figure out filename to store and retrive cached JSON results.
output_json_filename = filename + "." + vendor_name + ".json"
output_json_path = os.path.join(settings('output_dir'), output_json_filename)
output_json_path = os.path.join(
settings('output_dir'), output_json_filename)

# Check if the call is already cached.
if os.path.isfile(output_json_path):

# If so, read the result from the .json file stored in the output dir.
log_status(filepath, vendor_name, "skipping API call, already cached")
# If so, read the result from the .json file stored
# in the output dir.
log_status(filepath, vendor_name,
"skipping API call, already cached")
with open(output_json_path, 'r') as infile:
api_result = json.loads(infile.read())

Expand All @@ -195,24 +200,30 @@ def process_all_images():
# If not, make the API call for this particular vendor.
log_status(filepath, vendor_name, "calling API")
api_call_start = time.time()
api_result = vendor_module.call_vision_api(filepath, settings('api_keys'))
api_result = vendor_module.call_vision_api(
filepath, settings('api_keys'))
api_result['response_time'] = time.time() - api_call_start

# And cache the result in a .json file
log_status(filepath, vendor_name, "success, storing result in %s" % output_json_path)
log_status(filepath, vendor_name,
"success, storing result in %s" % output_json_path)
with open(output_json_path, 'w') as outfile:
api_result_str = json.dumps(api_result, sort_keys=True, indent=4, separators=(',', ': '))
api_result_str = json.dumps(
api_result, sort_keys=True, indent=4,
separators=(',', ': '))
outfile.write(api_result_str)

# Sleep so we avoid hitting throttling limits
time.sleep(1)

# Parse the JSON result we fetched (via API call or from cache)
standardized_result = vendor_module.get_standardized_result(api_result)
standardized_result = vendor_module.get_standardized_result(
api_result)

# Sort tags if found
if 'tags' in standardized_result:
standardized_result['tags'].sort(key=lambda tup: tup[1], reverse=True)
standardized_result['tags'].sort(
key=lambda tup: tup[1], reverse=True)

# If expected tags are provided, calculate accuracy
tags_count = 0
Expand All @@ -222,28 +233,32 @@ def process_all_images():
tags_count = len(standardized_result['tags'])

if settings('tagged_images'):
matching_tags = find_matching_tags(image_tags, standardized_result)
matching_tags = find_matching_tags(
image_tags, standardized_result)

if len(matching_tags) > 0:
matching_confidence = sum([t[1] for t in matching_tags]) / len(matching_tags)
matching_confidence = sum(
[t[1] for t in matching_tags]) / len(matching_tags)

image_result['vendors'].append({
'api_result' : api_result,
'vendor_name' : vendor_name,
'standardized_result' : standardized_result,
'output_json_filename' : output_json_filename,
'response_time' : api_result['response_time'],
'tags_count' : tags_count,
'matching_tags' : matching_tags,
'matching_tags_count' : len(matching_tags),
'matching_confidence' : matching_confidence,
'api_result': api_result,
'vendor_name': vendor_name,
'standardized_result': standardized_result,
'output_json_filename': output_json_filename,
'response_time': api_result['response_time'],
'tags_count': tags_count,
'matching_tags': matching_tags,
'matching_tags_count': len(matching_tags),
'matching_confidence': matching_confidence,
})

# Compute global statistics for each vendor
vendor_stats = vendor_statistics(image_results)

# Sort image_results output by filename (so that future runs produce comparable output)
image_results.sort(key=lambda image_result: image_result['output_image_filepath'])
# Sort image_results output by filename
# (so that future runs produce comparable output)
image_results.sort(
key=lambda image_result: image_result['output_image_filepath'])

# Render HTML file with all results.
output_html = render_from_template(
Expand All @@ -257,7 +272,7 @@ def process_all_images():
# Write HTML output.
output_html_filepath = os.path.join(settings('output_dir'), 'output.html')
with open(output_html_filepath, 'w') as output_html_file:
output_html_file.write(output_html.encode('utf-8'))
output_html_file.write(output_html)


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions static/template.html
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ <h2 class="image_name">desired_tags:
</span>
</td>
</tr>
{% for feature_name, feature_results in vendor['standardized_result'].iteritems() %}
{% for feature_name, feature_results in vendor['standardized_result'].items() %}
<tr>
<td class="result_name">
{{ vendor['vendor_name'] }}_{{ feature_name }}
Expand Down Expand Up @@ -135,7 +135,7 @@ <h4>Stats</h4>
On time taken, and number of tags returned. Note that Cloudsight returns captions, not a list of tags, so those counts appear as zero.
</p>
<table class="u-full-width">
{% for vendor, stats in vendor_stats.iteritems() %}
{% for vendor, stats in vendor_stats.items() %}
{% if loop.first %}
<tr class="raw_json">
<td class="result_name">Vendor</td>
Expand Down
4 changes: 2 additions & 2 deletions vendors/clarifai_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


def call_vision_api(image_filename, api_keys):
app = ClarifaiApp()
app = ClarifaiApp(api_key=api_keys['clarifai']['api_key'])
model = app.models.get('general-v1.3')
image = ClImage(file_obj=open(image_filename, 'rb'))
result = model.predict([image])
Expand All @@ -23,6 +23,6 @@ def get_standardized_result(api_result):
tag_names.append(concept['name'])
tag_scores.append(concept['value'])

output['tags'] = zip(tag_names, tag_scores)
output['tags'] = list(zip(tag_names, tag_scores))

return output
9 changes: 5 additions & 4 deletions vendors/cloudsight_.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import cloudsight


def call_vision_api(image_filename, api_keys):
api_key = api_keys['cloudsight']['api_key']
api_secret = api_keys['cloudsight']['api_secret']

# Via example found here:
# https://github.com/cloudsight/cloudsight-python
Expand All @@ -14,19 +14,20 @@ def call_vision_api(image_filename, api_keys):
response = api.image_request(image_file, image_filename)

response = api.wait(response['token'], timeout=60)

return response


def get_standardized_result(api_result):
output = {
'captions' : [],
'captions': [],
}

if api_result['status'] == 'completed':
output['captions'].append((api_result["name"], None))
elif api_result['status'] == 'skipped':
output['captions'].append(("error_skipped_because_" + api_result["reason"], None))
output['captions'].append(
("error_skipped_because_" + api_result["reason"], None))
else:
output['captions'].append(("error_" + api_result["status"], None))

Expand Down
66 changes: 35 additions & 31 deletions vendors/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,51 @@
import json
import requests


def _convert_image_to_base64(image_filename):
with open(image_filename, 'rb') as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()

return encoded_string


def call_vision_api(image_filename, api_keys):
api_key = api_keys['google']
post_url = "https://vision.googleapis.com/v1/images:annotate?key=" + api_key
post_url = "https://vision.googleapis.com/v1/images:annotate?key="\
+ api_key

base64_image = _convert_image_to_base64(image_filename)

post_payload = {
"requests": [
{
"image": {
"content" : base64_image
},
"features": [
{
"type": "LABEL_DETECTION",
"maxResults": 10
},
{
"type": "FACE_DETECTION",
"maxResults": 10
},
{
"type": "LANDMARK_DETECTION",
"maxResults": 10
},
{
"type": "LOGO_DETECTION",
"maxResults": 10
},
"requests": [
{
"type": "SAFE_SEARCH_DETECTION",
"maxResults": 10
},
]
}
]
"image": {
"content": base64_image
},
"features": [
{
"type": "LABEL_DETECTION",
"maxResults": 10
},
{
"type": "FACE_DETECTION",
"maxResults": 10
},
{
"type": "LANDMARK_DETECTION",
"maxResults": 10
},
{
"type": "LOGO_DETECTION",
"maxResults": 10
},
{
"type": "SAFE_SEARCH_DETECTION",
"maxResults": 10
},
]
}
]
}

result = requests.post(post_url, json=post_payload)
Expand All @@ -55,7 +58,7 @@ def call_vision_api(image_filename, api_keys):
# See this function in microsoft.py for docs.
def get_standardized_result(api_result):
output = {
'tags' : [],
'tags': [],
}

api_result = api_result['responses'][0]
Expand All @@ -69,6 +72,7 @@ def get_standardized_result(api_result):
if 'logoAnnotations' in api_result:
output['logo_tags'] = []
for annotation in api_result['logoAnnotations']:
output['logo_tags'].append((annotation['description'], annotation['score']))
output['logo_tags'].append(
(annotation['description'], annotation['score']))

return output
3 changes: 2 additions & 1 deletion vendors/ibm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from watson_developer_cloud import VisualRecognitionV3


def call_vision_api(image_filename, api_keys):
api_key = api_keys['ibm']

Expand All @@ -15,7 +16,7 @@ def call_vision_api(image_filename, api_keys):

def get_standardized_result(api_result):
output = {
'tags' : [],
'tags': [],
}

api_result = api_result["images"][0]
Expand Down
Loading