From 4cad8026a74b36b8ed19d7ed0d11b6a2b253d04a Mon Sep 17 00:00:00 2001 From: davidjtran Date: Fri, 30 Mar 2018 01:57:59 -0400 Subject: [PATCH] implementing direct presentation to user - check if link is on homepage --- api/models/heuristics/__init__.py | 2 +- api/models/heuristics/directpresentation.py | 51 +++++++++++++++++++++ api/models/procedural.py | 4 +- 3 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 api/models/heuristics/directpresentation.py diff --git a/api/models/heuristics/__init__.py b/api/models/heuristics/__init__.py index 0512131..806be7a 100644 --- a/api/models/heuristics/__init__.py +++ b/api/models/heuristics/__init__.py @@ -1 +1 @@ -__all__ = ["documentlength", "easeofnavigation", "mobileaccessibility", "mobilereadability", "plainlanguage", "typeconventions"] \ No newline at end of file +__all__ = ["documentlength", "easeofnavigation", "mobileaccessibility", "mobilereadability", "plainlanguage", "typeconventions", "directpresentation"] \ No newline at end of file diff --git a/api/models/heuristics/directpresentation.py b/api/models/heuristics/directpresentation.py new file mode 100644 index 0000000..f9b1803 --- /dev/null +++ b/api/models/heuristics/directpresentation.py @@ -0,0 +1,51 @@ +from models.heuristic import Heuristic +from selenium import webdriver +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.chrome.options import Options +from boilerpipe.extract import Extractor +from multiprocessing import RLock +from bs4 import BeautifulSoup, SoupStrainer +import re + +# Procedural 2q +# Ensure link to EULA can be found on home page +class DirectPresentation(Heuristic): + def score(self, eula): + if eula.url is None: + return {'score': -1, 'max': 4, 'reason': 'no url'} + else: + url = eula.url + suffixes = [".com", ".net", ".org"] + for i in suffixes: + n = url.find(i) + if n != -1: + break + url = url[0:n+4] + + chrome_options = Options() + chrome_options.add_argument("--hide-scrollbars") + chrome_options.set_headless() + + # Start chrome driver, and set window to initial width and height + driver = webdriver.Chrome(chrome_options=chrome_options) + driver.set_window_size(1920, 1080) + + # Grab desktop view + driver.get(url) + html = driver.page_source + + pattern = re.compile(r'terms') + soup = BeautifulSoup(html, 'html.parser') + search = soup.findAll('a', href=True, text=re.compile(r'end\W*user\W*license', re.I)) + if len(search) == 1: + return {'score': 4, 'max': 4, 'eula_found': search[0]['href']} + elif len(search) > 1: + return {'score': 4, 'max': 4, 'possible_eulas': search} + + search = soup.findAll('a', href=True, text=re.compile(r'terms', re.I)) + if len(search) == 1: + return {'score': 4, 'max': 4, 'eula_found': search[0]['href']} + elif len(search) > 1: + return {'score': 4, 'max': 4, 'possible_eulas': search} + + return {'score': 0, 'max': 4, 'reason': 'Could not find link to EULA on homepage'} diff --git a/api/models/procedural.py b/api/models/procedural.py index d2892fe..82a9506 100644 --- a/api/models/procedural.py +++ b/api/models/procedural.py @@ -9,7 +9,7 @@ class Procedural(Category): def evaluate(self, eula, thread_semaphore, ret_vars): # List of heuristics to evaluate, and the relative weighting of each - heuristics_to_eval = [mobileaccessibility.MobileAccessibility, mobilereadability.MobileReadability] - heuristic_weights = {'mobileaccessibility': 2, 'mobilereadability': 5} + heuristics_to_eval = [mobileaccessibility.MobileAccessibility, mobilereadability.MobileReadability, directpresentation.DirectPresentation] + heuristic_weights = {'mobileaccessibility': 2, 'mobilereadability': 5, "directpresentation" : 3} ret_vars['procedural'] = self.parallel_evaluate(eula, heuristics_to_eval, heuristic_weights, thread_semaphore)