Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions org/geekhub/ConnectionUtils.java
Original file line number Diff line number Diff line change
@@ -1,22 +1,39 @@
package org.geekhub;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

/**
* Utils class that contains useful method to interact with URLConnection
*/
public class ConnectionUtils {

public static final int BUFFER_SIZE = 16 * 1024;

/**
* Downloads content for specified URL and returns it as a byte array.
* Should be used for small files only. Don't use it to download big files it's dangerous.
*
* @param url
* @return
* @throws IOException
*/
public static byte[] getData(URL url) throws IOException {
//implement me
return null;

ByteArrayOutputStream outputStream = new ByteArrayOutputStream(BUFFER_SIZE);

try (InputStream is = new BufferedInputStream(url.openConnection().getInputStream(), BUFFER_SIZE)) {
int currentByte;
while ((currentByte = is.read()) != -1) {
outputStream.write(currentByte);
}
}

return outputStream.toByteArray();

}

}
20 changes: 13 additions & 7 deletions org/geekhub/ImageCrawler.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package org.geekhub;

import java.io.*;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Pattern;

/**
* ImageCrawler downloads all images to specified folder from specified resource.
Expand All @@ -15,7 +16,7 @@ public class ImageCrawler {

//number of threads to download images simultaneously
public static final int NUMBER_OF_THREADS = 10;

Pattern imagePattern = Pattern.compile(".+\\.(jpg|jpeg|png|bmp|tiff)");
private ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS);
private String folder;

Expand All @@ -25,11 +26,19 @@ public ImageCrawler(String folder) throws MalformedURLException {

/**
* Call this method to start download images from specified URL.
*
* @param urlToPage
* @throws IOException
*/
public void downloadImages(String urlToPage) throws IOException {
//implement me
URL url = new URL(urlToPage);
if (isImageURL(url)) {
executorService.submit(new ImageTask(url, folder));
} else {
Page page = new Page(url);
page.getImageLinks().stream().forEach(link -> executorService.submit(new ImageTask(link, folder)));
}

}

/**
Expand All @@ -41,10 +50,7 @@ public void stop() {

//detects is current url is an image. Checking for popular extensions should be enough
private boolean isImageURL(URL url) {
//implement me
return false;
return imagePattern.matcher(url.toString()).matches();
}



}
12 changes: 11 additions & 1 deletion org/geekhub/ImageTask.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
package org.geekhub;

import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.InvalidPathException;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
* Represents worker that downloads image from URL to specified folder.<br/>
Expand All @@ -20,7 +25,12 @@ public ImageTask(URL url, String folder) {
*/
@Override
public void run() {
//implement me
try {
Path path = Paths.get(folder, buildFileName(url));
Files.write(path, ConnectionUtils.getData(url));
} catch (IOException | InvalidPathException e) {
e.printStackTrace();
}
}

//converts URL to unique file name
Expand Down
2 changes: 1 addition & 1 deletion org/geekhub/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void main(String[] args) throws IOException {

Scanner scanner = new Scanner(System.in);
String command;
while(!"exit".equals(command = scanner.next())) {
while (!"exit".equals(command = scanner.next())) {
imageCrawler.downloadImages(command);
System.out.println("...and another url:");
}
Expand Down
9 changes: 7 additions & 2 deletions org/geekhub/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand All @@ -20,6 +22,7 @@ public class Page {

/**
* Be careful, constructor downloads content, it could be slow.
*
* @param url
* @throws IOException
*/
Expand All @@ -30,6 +33,7 @@ public Page(URL url) throws IOException {

/**
* Extracts all links from the page like <a href={link}>bla</a>. Method does not cache content. Each time new list will be returned.
*
* @return list of URLs from that page.
* @throws MalformedURLException
*/
Expand All @@ -39,6 +43,7 @@ public Collection<URL> getLinks() throws MalformedURLException {

/**
* Extracts all links to images from the page like <img src={link}/>. Method does not cache content. Each time new list will be returned.
*
* @return list of URLs to images from that page.
* @throws MalformedURLException
*/
Expand All @@ -48,7 +53,7 @@ public Collection<URL> getImageLinks() throws MalformedURLException {

private Collection<URL> extractMatches(Matcher matcher) throws MalformedURLException {
Set<URL> links = new HashSet<>();
while(matcher.find()) {
while (matcher.find()) {
links.add(new URL(url, matcher.group(1)));
}
return links;
Expand Down