Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/out
.idea
*.iml
22 changes: 0 additions & 22 deletions org/geekhub/ConnectionUtils.java

This file was deleted.

33 changes: 33 additions & 0 deletions src/org/geekhub/ConnectionUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.geekhub;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;

/**
* Utils class that contains useful method to interact with URLConnection
*/
public class ConnectionUtils {

/**
* Downloads content for specified URL and returns it as a byte array.
* Should be used for small files only. Don't use it to download big files it's dangerous.
* @param url url of web page to download
* @return web page source as byte array
* @throws IOException
*/
public static byte[] getData(URL url) throws IOException {
URLConnection connection = url.openConnection();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
try (BufferedInputStream in = new BufferedInputStream(connection.getInputStream())) {
int b;
while ((b = in.read()) != -1) {
outputStream.write(b);
}
}
return outputStream.toByteArray();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collection;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

Expand All @@ -25,11 +26,14 @@ public ImageCrawler(String folder) throws MalformedURLException {

/**
* Call this method to start download images from specified URL.
* @param urlToPage
* @param urlToPage url to web page
* @throws IOException
*/
public void downloadImages(String urlToPage) throws IOException {
//implement me
Page page = new Page(new URL(urlToPage));
page.getImageLinks().stream()
.filter(this::isImageURL)
.forEach(url -> executorService.submit(new ImageTask(url, folder)));
}

/**
Expand All @@ -39,12 +43,11 @@ public void stop() {
executorService.shutdown();
}

//detects is current url is an image. Checking for popular extensions should be enough
private boolean isImageURL(URL url) {
//implement me
return false;
String u = url.toString();
return u.endsWith(".png") || u.endsWith(".jpg") || u.endsWith(".jpeg") ||
u.endsWith(".gif") || u.endsWith(".ico") || u.endsWith(".svg") ||
u.endsWith(".bmp");
}



}
18 changes: 16 additions & 2 deletions org/geekhub/ImageTask.java → src/org/geekhub/ImageTask.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.geekhub;

import java.io.*;
import java.net.URL;

/**
Expand All @@ -20,11 +21,24 @@ public ImageTask(URL url, String folder) {
*/
@Override
public void run() {
//implement me
try (
BufferedInputStream inputStream = new BufferedInputStream(url.openStream());
BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(folder + buildFileName(url)))
) {
int b;
while ((b = inputStream.read()) != -1) {
outputStream.write(b);
}
} catch (IOException e) {
System.out.println("Error while loading data from " + url);
}
}

//converts URL to unique file name
/**
* Converts URL to unique file name
*/
private String buildFileName(URL url) {
return url.toString().replaceAll("[^a-zA-Z0-9-_\\.]", "_");
}

}
2 changes: 1 addition & 1 deletion org/geekhub/Main.java → src/org/geekhub/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

public class Main {

public static final String FOLDER_TO_DOWNLOAD = "d:/images/";
public static final String FOLDER_TO_DOWNLOAD = "/tmp/images/";

public static void main(String[] args) throws IOException {
ImageCrawler imageCrawler = new ImageCrawler(FOLDER_TO_DOWNLOAD);
Expand Down
3 changes: 2 additions & 1 deletion org/geekhub/Page.java → src/org/geekhub/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class Page {

/**
* Be careful, constructor downloads content, it could be slow.
* @param url
* @param url url to web page
* @throws IOException
*/
public Page(URL url) throws IOException {
Expand Down Expand Up @@ -53,4 +53,5 @@ private Collection<URL> extractMatches(Matcher matcher) throws MalformedURLExcep
}
return links;
}

}