From 7a2df96193664c2d9abb9744f9b952491b875216 Mon Sep 17 00:00:00 2001 From: Andrey Chervonyuk Date: Fri, 8 Jan 2016 23:26:31 +0200 Subject: [PATCH] Homework 10 --- org/geekhub/ConnectionUtils.java | 16 +++++++++++++--- org/geekhub/ImageCrawler.java | 15 ++++++++++++--- org/geekhub/ImageTask.java | 12 +++++++++++- org/geekhub/Page.java | 4 ++-- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/org/geekhub/ConnectionUtils.java b/org/geekhub/ConnectionUtils.java index f67536f..288f65a 100644 --- a/org/geekhub/ConnectionUtils.java +++ b/org/geekhub/ConnectionUtils.java @@ -1,6 +1,6 @@ package org.geekhub; -import java.io.IOException; +import java.io.*; import java.net.URL; /** @@ -16,7 +16,17 @@ public class ConnectionUtils { * @throws IOException */ public static byte[] getData(URL url) throws IOException { - //implement me - return null; + ByteArrayOutputStream data = new ByteArrayOutputStream(); + + try (BufferedInputStream inputStream = new BufferedInputStream(url.openStream())) { + byte[] buffer = new byte[1024]; + int bytesRead; + + while ((bytesRead = inputStream.read(buffer)) != -1) { + data.write(buffer, 0, bytesRead); + } + } + + return data.toByteArray(); } } diff --git a/org/geekhub/ImageCrawler.java b/org/geekhub/ImageCrawler.java index 8cad33b..db34e85 100644 --- a/org/geekhub/ImageCrawler.java +++ b/org/geekhub/ImageCrawler.java @@ -5,6 +5,7 @@ import java.net.URL; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.regex.Pattern; /** * ImageCrawler downloads all images to specified folder from specified resource. @@ -19,6 +20,8 @@ public class ImageCrawler { private ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS); private String folder; + Pattern imagePattern = Pattern.compile("([^\\s]+(\\.(?i)(jpg|png|gif|bmp))$)"); + public ImageCrawler(String folder) throws MalformedURLException { this.folder = folder; } @@ -29,7 +32,14 @@ public ImageCrawler(String folder) throws MalformedURLException { * @throws IOException */ public void downloadImages(String urlToPage) throws IOException { - //implement me + URL url = new URL(urlToPage); + + if (isImageURL(url)) { + executorService.submit(new ImageTask(url, folder)); + } else { + new Page(url).getImageLinks().stream() + .forEach((URL link) -> executorService.submit(new ImageTask(link, folder))); + } } /** @@ -41,8 +51,7 @@ public void stop() { //detects is current url is an image. Checking for popular extensions should be enough private boolean isImageURL(URL url) { - //implement me - return false; + return imagePattern.matcher(url.toString()).matches(); } diff --git a/org/geekhub/ImageTask.java b/org/geekhub/ImageTask.java index de0a340..2dfdd5c 100644 --- a/org/geekhub/ImageTask.java +++ b/org/geekhub/ImageTask.java @@ -1,6 +1,10 @@ package org.geekhub; +import java.io.IOException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; /** * Represents worker that downloads image from URL to specified folder.
@@ -20,7 +24,13 @@ public ImageTask(URL url, String folder) { */ @Override public void run() { - //implement me + Path path = Paths.get(folder + "/" + buildFileName(url)); + + try { + Files.write(path, ConnectionUtils.getData(url)); + } catch (IOException e) { + e.printStackTrace(); + } } //converts URL to unique file name diff --git a/org/geekhub/Page.java b/org/geekhub/Page.java index f9915de..d2dba3f 100644 --- a/org/geekhub/Page.java +++ b/org/geekhub/Page.java @@ -12,8 +12,8 @@ * Allows high-level access to page elements. */ public class Page { - Pattern linkPattern = Pattern.compile("]*?\\s)*?href=\"(.*?)\".*?>"); - Pattern imageLinkPattern = Pattern.compile("|)"); + Pattern linkPattern = Pattern.compile("]*?\\s)*?href=\"(https?.*?)\".*?>"); + Pattern imageLinkPattern = Pattern.compile("|)"); private String content; private URL url;