diff --git a/org/geekhub/ConnectionUtils.java b/org/geekhub/ConnectionUtils.java
index f67536f..288f65a 100644
--- a/org/geekhub/ConnectionUtils.java
+++ b/org/geekhub/ConnectionUtils.java
@@ -1,6 +1,6 @@
package org.geekhub;
-import java.io.IOException;
+import java.io.*;
import java.net.URL;
/**
@@ -16,7 +16,17 @@ public class ConnectionUtils {
* @throws IOException
*/
public static byte[] getData(URL url) throws IOException {
- //implement me
- return null;
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+
+ try (BufferedInputStream inputStream = new BufferedInputStream(url.openStream())) {
+ byte[] buffer = new byte[1024];
+ int bytesRead;
+
+ while ((bytesRead = inputStream.read(buffer)) != -1) {
+ data.write(buffer, 0, bytesRead);
+ }
+ }
+
+ return data.toByteArray();
}
}
diff --git a/org/geekhub/ImageCrawler.java b/org/geekhub/ImageCrawler.java
index 8cad33b..db34e85 100644
--- a/org/geekhub/ImageCrawler.java
+++ b/org/geekhub/ImageCrawler.java
@@ -5,6 +5,7 @@
import java.net.URL;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
+import java.util.regex.Pattern;
/**
* ImageCrawler downloads all images to specified folder from specified resource.
@@ -19,6 +20,8 @@ public class ImageCrawler {
private ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS);
private String folder;
+ Pattern imagePattern = Pattern.compile("([^\\s]+(\\.(?i)(jpg|png|gif|bmp))$)");
+
public ImageCrawler(String folder) throws MalformedURLException {
this.folder = folder;
}
@@ -29,7 +32,14 @@ public ImageCrawler(String folder) throws MalformedURLException {
* @throws IOException
*/
public void downloadImages(String urlToPage) throws IOException {
- //implement me
+ URL url = new URL(urlToPage);
+
+ if (isImageURL(url)) {
+ executorService.submit(new ImageTask(url, folder));
+ } else {
+ new Page(url).getImageLinks().stream()
+ .forEach((URL link) -> executorService.submit(new ImageTask(link, folder)));
+ }
}
/**
@@ -41,8 +51,7 @@ public void stop() {
//detects is current url is an image. Checking for popular extensions should be enough
private boolean isImageURL(URL url) {
- //implement me
- return false;
+ return imagePattern.matcher(url.toString()).matches();
}
diff --git a/org/geekhub/ImageTask.java b/org/geekhub/ImageTask.java
index de0a340..2dfdd5c 100644
--- a/org/geekhub/ImageTask.java
+++ b/org/geekhub/ImageTask.java
@@ -1,6 +1,10 @@
package org.geekhub;
+import java.io.IOException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
/**
* Represents worker that downloads image from URL to specified folder.
@@ -20,7 +24,13 @@ public ImageTask(URL url, String folder) {
*/
@Override
public void run() {
- //implement me
+ Path path = Paths.get(folder + "/" + buildFileName(url));
+
+ try {
+ Files.write(path, ConnectionUtils.getData(url));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
//converts URL to unique file name
diff --git a/org/geekhub/Page.java b/org/geekhub/Page.java
index f9915de..d2dba3f 100644
--- a/org/geekhub/Page.java
+++ b/org/geekhub/Page.java
@@ -12,8 +12,8 @@
* Allows high-level access to page elements.
*/
public class Page {
- Pattern linkPattern = Pattern.compile("]*?\\s)*?href=\"(.*?)\".*?>");
- Pattern imageLinkPattern = Pattern.compile("|)");
+ Pattern linkPattern = Pattern.compile("]*?\\s)*?href=\"(https?.*?)\".*?>");
+ Pattern imageLinkPattern = Pattern.compile("|)");
private String content;
private URL url;