diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2ca22b2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +### Example user template template +### Example user template + +# IntelliJ project files +.idea +*.iml +out +gen +# Created by .ignore support plugin (hsz.mobi) diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/org/geekhub/ConnectionUtils.java b/org/geekhub/ConnectionUtils.java index f67536f..1cb989e 100644 --- a/org/geekhub/ConnectionUtils.java +++ b/org/geekhub/ConnectionUtils.java @@ -1,8 +1,11 @@ package org.geekhub; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; import java.net.URL; + /** * Utils class that contains useful method to interact with URLConnection */ @@ -11,12 +14,24 @@ public class ConnectionUtils { /** * Downloads content for specified URL and returns it as a byte array. * Should be used for small files only. Don't use it to download big files it's dangerous. + * * @param url * @return * @throws IOException */ - public static byte[] getData(URL url) throws IOException { - //implement me - return null; + public static byte[] getData(URL url) { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + int nRead; + byte[] data = new byte[16384]; + + try (InputStream inStream = url.openConnection().getInputStream()) { + while ((nRead = inStream.read(data, 0, data.length)) != -1) { + buffer.write(data, 0, nRead); + } + } catch (IOException e) { + e.printStackTrace(); + } + + return buffer.toByteArray(); } } diff --git a/org/geekhub/ImageCrawler.java b/org/geekhub/ImageCrawler.java index 8cad33b..7935eaa 100644 --- a/org/geekhub/ImageCrawler.java +++ b/org/geekhub/ImageCrawler.java @@ -12,10 +12,8 @@ * To shutdown the service you should call stop() method */ public class ImageCrawler { - //number of threads to download images simultaneously public static final int NUMBER_OF_THREADS = 10; - private ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS); private String folder; @@ -25,11 +23,18 @@ public ImageCrawler(String folder) throws MalformedURLException { /** * Call this method to start download images from specified URL. + * * @param urlToPage * @throws IOException */ public void downloadImages(String urlToPage) throws IOException { - //implement me + Page page = new Page(new URL(urlToPage)); + + for (URL url : page.getImageLinks()) + if (isImageURL(url)) { + ImageTask imageTask = new ImageTask(url, folder); + executorService.execute(imageTask); + } } /** @@ -41,10 +46,15 @@ public void stop() { //detects is current url is an image. Checking for popular extensions should be enough private boolean isImageURL(URL url) { - //implement me + String[] fileExtensions = {"jpg", "png"}; + String fileName = url.toString(); + String fileExtension = fileName.substring(fileName.lastIndexOf(".") + 1, fileName.length()); + for (String extension : fileExtensions) { + if (extension.equalsIgnoreCase(fileExtension)) + return true; + } return false; } - } diff --git a/org/geekhub/ImageTask.java b/org/geekhub/ImageTask.java index de0a340..b58589e 100644 --- a/org/geekhub/ImageTask.java +++ b/org/geekhub/ImageTask.java @@ -1,6 +1,11 @@ package org.geekhub; +import java.io.IOException; +import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; /** * Represents worker that downloads image from URL to specified folder.
@@ -20,11 +25,19 @@ public ImageTask(URL url, String folder) { */ @Override public void run() { - //implement me + Path folderPath = Paths.get(folder + buildFileName(url)); + try { + Files.write(folderPath,ConnectionUtils.getData(url)); + } catch (IOException e) { + e.printStackTrace(); + } } //converts URL to unique file name private String buildFileName(URL url) { return url.toString().replaceAll("[^a-zA-Z0-9-_\\.]", "_"); } + + + } diff --git a/org/geekhub/Main.java b/org/geekhub/Main.java index 7cff652..0eb7c8a 100644 --- a/org/geekhub/Main.java +++ b/org/geekhub/Main.java @@ -5,7 +5,7 @@ public class Main { - public static final String FOLDER_TO_DOWNLOAD = "d:/images/"; + public static final String FOLDER_TO_DOWNLOAD = "/home/sergei/picture/"; public static void main(String[] args) throws IOException { ImageCrawler imageCrawler = new ImageCrawler(FOLDER_TO_DOWNLOAD); @@ -16,9 +16,12 @@ public static void main(String[] args) throws IOException { Scanner scanner = new Scanner(System.in); String command; while(!"exit".equals(command = scanner.next())) { + System.out.println(command); imageCrawler.downloadImages(command); + System.out.println("...and another url:"); } + System.out.println("exit go"); imageCrawler.stop(); } }