From 305bd96fa10dca321666ee2b3a1d8d95ad181881 Mon Sep 17 00:00:00 2001 From: "Nicholas.Morhun" Date: Sat, 23 Jan 2016 13:21:08 +0200 Subject: [PATCH 1/3] Connection Utils implemented --- .gitignore | 2 ++ org/geekhub/ConnectionUtils.java | 22 --------------- src/org/geekhub/ConnectionUtils.java | 33 ++++++++++++++++++++++ {org => src/org}/geekhub/ImageCrawler.java | 2 +- {org => src/org}/geekhub/ImageTask.java | 5 +++- {org => src/org}/geekhub/Main.java | 2 +- {org => src/org}/geekhub/Page.java | 3 +- 7 files changed, 43 insertions(+), 26 deletions(-) create mode 100644 .gitignore delete mode 100644 org/geekhub/ConnectionUtils.java create mode 100644 src/org/geekhub/ConnectionUtils.java rename {org => src/org}/geekhub/ImageCrawler.java (97%) rename {org => src/org}/geekhub/ImageTask.java (92%) rename {org => src/org}/geekhub/Main.java (91%) rename {org => src/org}/geekhub/Page.java (98%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fa2b886 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/out +/.idea diff --git a/org/geekhub/ConnectionUtils.java b/org/geekhub/ConnectionUtils.java deleted file mode 100644 index f67536f..0000000 --- a/org/geekhub/ConnectionUtils.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.geekhub; - -import java.io.IOException; -import java.net.URL; - -/** - * Utils class that contains useful method to interact with URLConnection - */ -public class ConnectionUtils { - - /** - * Downloads content for specified URL and returns it as a byte array. - * Should be used for small files only. Don't use it to download big files it's dangerous. - * @param url - * @return - * @throws IOException - */ - public static byte[] getData(URL url) throws IOException { - //implement me - return null; - } -} diff --git a/src/org/geekhub/ConnectionUtils.java b/src/org/geekhub/ConnectionUtils.java new file mode 100644 index 0000000..9fbea2e --- /dev/null +++ b/src/org/geekhub/ConnectionUtils.java @@ -0,0 +1,33 @@ +package org.geekhub; + +import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.URL; +import java.net.URLConnection; + +/** + * Utils class that contains useful method to interact with URLConnection + */ +public class ConnectionUtils { + + /** + * Downloads content for specified URL and returns it as a byte array. + * Should be used for small files only. Don't use it to download big files it's dangerous. + * @param url url of web page to download + * @return web page source as byte array + * @throws IOException + */ + public static byte[] getData(URL url) throws IOException { + URLConnection connection = url.openConnection(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + try (BufferedInputStream in = new BufferedInputStream(connection.getInputStream())) { + int b; + while ((b = in.read()) != -1) { + outputStream.write(b); + } + } + return outputStream.toByteArray(); + } + +} diff --git a/org/geekhub/ImageCrawler.java b/src/org/geekhub/ImageCrawler.java similarity index 97% rename from org/geekhub/ImageCrawler.java rename to src/org/geekhub/ImageCrawler.java index 8cad33b..f325c4c 100644 --- a/org/geekhub/ImageCrawler.java +++ b/src/org/geekhub/ImageCrawler.java @@ -25,7 +25,7 @@ public ImageCrawler(String folder) throws MalformedURLException { /** * Call this method to start download images from specified URL. - * @param urlToPage + * @param urlToPage url to web page * @throws IOException */ public void downloadImages(String urlToPage) throws IOException { diff --git a/org/geekhub/ImageTask.java b/src/org/geekhub/ImageTask.java similarity index 92% rename from org/geekhub/ImageTask.java rename to src/org/geekhub/ImageTask.java index de0a340..10e1f95 100644 --- a/org/geekhub/ImageTask.java +++ b/src/org/geekhub/ImageTask.java @@ -23,8 +23,11 @@ public void run() { //implement me } - //converts URL to unique file name + /** + * Converts URL to unique file name + */ private String buildFileName(URL url) { return url.toString().replaceAll("[^a-zA-Z0-9-_\\.]", "_"); } + } diff --git a/org/geekhub/Main.java b/src/org/geekhub/Main.java similarity index 91% rename from org/geekhub/Main.java rename to src/org/geekhub/Main.java index 7cff652..1d233a9 100644 --- a/org/geekhub/Main.java +++ b/src/org/geekhub/Main.java @@ -5,7 +5,7 @@ public class Main { - public static final String FOLDER_TO_DOWNLOAD = "d:/images/"; + public static final String FOLDER_TO_DOWNLOAD = "/tmp/images/"; public static void main(String[] args) throws IOException { ImageCrawler imageCrawler = new ImageCrawler(FOLDER_TO_DOWNLOAD); diff --git a/org/geekhub/Page.java b/src/org/geekhub/Page.java similarity index 98% rename from org/geekhub/Page.java rename to src/org/geekhub/Page.java index f9915de..f19d20b 100644 --- a/org/geekhub/Page.java +++ b/src/org/geekhub/Page.java @@ -20,7 +20,7 @@ public class Page { /** * Be careful, constructor downloads content, it could be slow. - * @param url + * @param url url to web page * @throws IOException */ public Page(URL url) throws IOException { @@ -53,4 +53,5 @@ private Collection extractMatches(Matcher matcher) throws MalformedURLExcep } return links; } + } From 6d2c520cd6b0edc5b16502f0db886b71bdbe4b60 Mon Sep 17 00:00:00 2001 From: "Nicholas.Morhun" Date: Sat, 23 Jan 2016 14:04:13 +0200 Subject: [PATCH 2/3] ImageTask is implemented --- .gitignore | 3 ++- src/org/geekhub/ImageTask.java | 13 ++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index fa2b886..49e1311 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /out -/.idea +.idea +*.iml diff --git a/src/org/geekhub/ImageTask.java b/src/org/geekhub/ImageTask.java index 10e1f95..c97b249 100644 --- a/src/org/geekhub/ImageTask.java +++ b/src/org/geekhub/ImageTask.java @@ -1,5 +1,6 @@ package org.geekhub; +import java.io.*; import java.net.URL; /** @@ -20,7 +21,17 @@ public ImageTask(URL url, String folder) { */ @Override public void run() { - //implement me + try ( + BufferedInputStream inputStream = new BufferedInputStream(url.openStream()); + BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(folder + buildFileName(url))) + ) { + int b; + while ((b = inputStream.read()) != -1) { + outputStream.write(b); + } + } catch (IOException e) { + System.out.println("Error while loading data from " + url); + } } /** From ada7eb7d931e4c4f3c5d6684e425d8a8229894ff Mon Sep 17 00:00:00 2001 From: "Nicholas.Morhun" Date: Sat, 23 Jan 2016 15:50:38 +0200 Subject: [PATCH 3/3] Task is done --- src/org/geekhub/ImageCrawler.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/org/geekhub/ImageCrawler.java b/src/org/geekhub/ImageCrawler.java index f325c4c..04e0c7b 100644 --- a/src/org/geekhub/ImageCrawler.java +++ b/src/org/geekhub/ImageCrawler.java @@ -3,6 +3,7 @@ import java.io.*; import java.net.MalformedURLException; import java.net.URL; +import java.util.Collection; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -29,7 +30,10 @@ public ImageCrawler(String folder) throws MalformedURLException { * @throws IOException */ public void downloadImages(String urlToPage) throws IOException { - //implement me + Page page = new Page(new URL(urlToPage)); + page.getImageLinks().stream() + .filter(this::isImageURL) + .forEach(url -> executorService.submit(new ImageTask(url, folder))); } /** @@ -39,12 +43,11 @@ public void stop() { executorService.shutdown(); } - //detects is current url is an image. Checking for popular extensions should be enough private boolean isImageURL(URL url) { - //implement me - return false; + String u = url.toString(); + return u.endsWith(".png") || u.endsWith(".jpg") || u.endsWith(".jpeg") || + u.endsWith(".gif") || u.endsWith(".ico") || u.endsWith(".svg") || + u.endsWith(".bmp"); } - - }