diff --git a/org/geekhub/ConnectionUtils.java b/org/geekhub/ConnectionUtils.java index f67536f..d54ff03 100644 --- a/org/geekhub/ConnectionUtils.java +++ b/org/geekhub/ConnectionUtils.java @@ -1,6 +1,9 @@ package org.geekhub; +import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; import java.net.URL; /** @@ -8,15 +11,29 @@ */ public class ConnectionUtils { + public static final int BUFFER_SIZE = 16 * 1024; + /** * Downloads content for specified URL and returns it as a byte array. * Should be used for small files only. Don't use it to download big files it's dangerous. + * * @param url * @return * @throws IOException */ public static byte[] getData(URL url) throws IOException { - //implement me - return null; + + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(BUFFER_SIZE); + + try (InputStream is = new BufferedInputStream(url.openConnection().getInputStream(), BUFFER_SIZE)) { + int currentByte; + while ((currentByte = is.read()) != -1) { + outputStream.write(currentByte); + } + } + + return outputStream.toByteArray(); + } + } diff --git a/org/geekhub/ImageCrawler.java b/org/geekhub/ImageCrawler.java index 8cad33b..0dea225 100644 --- a/org/geekhub/ImageCrawler.java +++ b/org/geekhub/ImageCrawler.java @@ -1,10 +1,11 @@ package org.geekhub; -import java.io.*; +import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.regex.Pattern; /** * ImageCrawler downloads all images to specified folder from specified resource. @@ -15,7 +16,7 @@ public class ImageCrawler { //number of threads to download images simultaneously public static final int NUMBER_OF_THREADS = 10; - + Pattern imagePattern = Pattern.compile(".+\\.(jpg|jpeg|png|bmp|tiff)"); private ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS); private String folder; @@ -25,11 +26,19 @@ public ImageCrawler(String folder) throws MalformedURLException { /** * Call this method to start download images from specified URL. + * * @param urlToPage * @throws IOException */ public void downloadImages(String urlToPage) throws IOException { - //implement me + URL url = new URL(urlToPage); + if (isImageURL(url)) { + executorService.submit(new ImageTask(url, folder)); + } else { + Page page = new Page(url); + page.getImageLinks().stream().forEach(link -> executorService.submit(new ImageTask(link, folder))); + } + } /** @@ -41,10 +50,7 @@ public void stop() { //detects is current url is an image. Checking for popular extensions should be enough private boolean isImageURL(URL url) { - //implement me - return false; + return imagePattern.matcher(url.toString()).matches(); } - - } diff --git a/org/geekhub/ImageTask.java b/org/geekhub/ImageTask.java index de0a340..599af16 100644 --- a/org/geekhub/ImageTask.java +++ b/org/geekhub/ImageTask.java @@ -1,6 +1,11 @@ package org.geekhub; +import java.io.IOException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; +import java.nio.file.Paths; /** * Represents worker that downloads image from URL to specified folder.
@@ -20,7 +25,12 @@ public ImageTask(URL url, String folder) { */ @Override public void run() { - //implement me + try { + Path path = Paths.get(folder, buildFileName(url)); + Files.write(path, ConnectionUtils.getData(url)); + } catch (IOException | InvalidPathException e) { + e.printStackTrace(); + } } //converts URL to unique file name diff --git a/org/geekhub/Main.java b/org/geekhub/Main.java index 7cff652..3d1b735 100644 --- a/org/geekhub/Main.java +++ b/org/geekhub/Main.java @@ -15,7 +15,7 @@ public static void main(String[] args) throws IOException { Scanner scanner = new Scanner(System.in); String command; - while(!"exit".equals(command = scanner.next())) { + while (!"exit".equals(command = scanner.next())) { imageCrawler.downloadImages(command); System.out.println("...and another url:"); } diff --git a/org/geekhub/Page.java b/org/geekhub/Page.java index f9915de..c82481c 100644 --- a/org/geekhub/Page.java +++ b/org/geekhub/Page.java @@ -3,7 +3,9 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.util.*; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -20,6 +22,7 @@ public class Page { /** * Be careful, constructor downloads content, it could be slow. + * * @param url * @throws IOException */ @@ -30,6 +33,7 @@ public Page(URL url) throws IOException { /** * Extracts all links from the page like bla. Method does not cache content. Each time new list will be returned. + * * @return list of URLs from that page. * @throws MalformedURLException */ @@ -39,6 +43,7 @@ public Collection getLinks() throws MalformedURLException { /** * Extracts all links to images from the page like . Method does not cache content. Each time new list will be returned. + * * @return list of URLs to images from that page. * @throws MalformedURLException */ @@ -48,7 +53,7 @@ public Collection getImageLinks() throws MalformedURLException { private Collection extractMatches(Matcher matcher) throws MalformedURLException { Set links = new HashSet<>(); - while(matcher.find()) { + while (matcher.find()) { links.add(new URL(url, matcher.group(1))); } return links;