Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions org/geekhub/ConnectionUtils.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.geekhub;

import java.io.IOException;
import java.io.*;
import java.net.URL;

/**
Expand All @@ -16,7 +16,17 @@ public class ConnectionUtils {
* @throws IOException
*/
public static byte[] getData(URL url) throws IOException {
//implement me
return null;
ByteArrayOutputStream data = new ByteArrayOutputStream();

try (BufferedInputStream inputStream = new BufferedInputStream(url.openStream())) {
byte[] buffer = new byte[1024];
int bytesRead;

while ((bytesRead = inputStream.read(buffer)) != -1) {
data.write(buffer, 0, bytesRead);
}
}

return data.toByteArray();
}
}
15 changes: 12 additions & 3 deletions org/geekhub/ImageCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.net.URL;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Pattern;

/**
* ImageCrawler downloads all images to specified folder from specified resource.
Expand All @@ -19,6 +20,8 @@ public class ImageCrawler {
private ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS);
private String folder;

Pattern imagePattern = Pattern.compile("([^\\s]+(\\.(?i)(jpg|png|gif|bmp))$)");

public ImageCrawler(String folder) throws MalformedURLException {
this.folder = folder;
}
Expand All @@ -29,7 +32,14 @@ public ImageCrawler(String folder) throws MalformedURLException {
* @throws IOException
*/
public void downloadImages(String urlToPage) throws IOException {
//implement me
URL url = new URL(urlToPage);

if (isImageURL(url)) {
executorService.submit(new ImageTask(url, folder));
} else {
new Page(url).getImageLinks().stream()
.forEach((URL link) -> executorService.submit(new ImageTask(link, folder)));
}
}

/**
Expand All @@ -41,8 +51,7 @@ public void stop() {

//detects is current url is an image. Checking for popular extensions should be enough
private boolean isImageURL(URL url) {
//implement me
return false;
return imagePattern.matcher(url.toString()).matches();
}


Expand Down
12 changes: 11 additions & 1 deletion org/geekhub/ImageTask.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package org.geekhub;

import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
* Represents worker that downloads image from URL to specified folder.<br/>
Expand All @@ -20,7 +24,13 @@ public ImageTask(URL url, String folder) {
*/
@Override
public void run() {
//implement me
Path path = Paths.get(folder + "/" + buildFileName(url));

try {
Files.write(path, ConnectionUtils.getData(url));
} catch (IOException e) {
e.printStackTrace();
}
}

//converts URL to unique file name
Expand Down
4 changes: 2 additions & 2 deletions org/geekhub/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
* Allows high-level access to page elements.
*/
public class Page {
Pattern linkPattern = Pattern.compile("<a\\s(?:[^\\s>]*?\\s)*?href=\"(.*?)\".*?>");
Pattern imageLinkPattern = Pattern.compile("<img.*?src=\"(.*?)\".*?(/>|</img>)");
Pattern linkPattern = Pattern.compile("<a\\s(?:[^\\s>]*?\\s)*?href=\"(https?.*?)\".*?>");
Pattern imageLinkPattern = Pattern.compile("<img.*?src=\"(.*?)\".*?(/>|)");

private String content;
private URL url;
Expand Down