diff --git a/.gitignore b/.gitignore
index be2baa1..fffebb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
+.idea
+.vcs
+
+*.csv
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/README.md b/README.md
index e557119..40aae9d 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,66 @@
-vgchartzfull is a python script based on BeautifulSoup.
-It creates a dataset based on data from 
-http://www.vgchartz.com/gamedb/
+# vgchartzfull - A crawler to download data from Global Videogame Sales
 
-The dataset is saved as vgsales.csv.
+vgchartz-full-crawler.py is a python@3 crawler script based on BeautifulSoup.
+It creates a csv dataset with data from more than 57,000 games. based on data from [VGChartz Site](http://www.vgchartz.com/gamedb/).  
+
+## Output
+
+The dataset is saved in the file specified at cfg/resources.json, by default "dataset/vgsales.csv".
+
+## Install & execution
+
+You will need to have some depencies compiled at **requirements.txt**.
 
-You will need to have BeautifulSoup added.
 It can be installed by pip.
 
-sudo pip install BeautifulSoup
+```bash
+
+  # Install dependencies
+  $> pip install -r requirements.txt
+  
+  # Run
+  $> python vgchartzfull.py
+  
+
+```
+
+## Dictionary
+
+The dataset it's composed by this fields, and the data is collected with this [methodology](https://www.vgchartz.com/methodology.php).
+
+| Field | Description              |
+|-------|--------------------------|
+| Rank  | Ranking of overall sales |
+| Name | The games name |
+| Genre | Genre of the game |
+| Platform | Platform of the games release (i.e. PC,PS4, etc.) |
+| Developer | Developer of the game | 
+| Publisher | Publisher of the game |
+| Vgchartz_Score | Score at VGcharz site | 
+| Critic_Score | Score at Critic | 
+| User_Score | Score by VGcharts users' site | 
+| Total_Shipped | Total worldwide shipments (in millions) | 
+| Total_Sales | Total worldwide sales (in millions) |
+| NA_Sales | Sales in North America (in millions) |
+| EU_Sales | Sales in Europe (in millions) |
+| JP_Sales | Sales in Japan (in millions) |
+| Other_Sales | Sales in the rest of the world (in millions) |
+| Release_Date | Year of the game's release |
+| Last_Update | Last update of this register |
+
+## TODO
+
+- [ ] Remap the columns according the selected values at resources.json
+- [ ] Add some unit testing
+- [ ] Dockerize (w/ alpine-python) to ease use and avoid intallations
+- [ ] Publish at Docker hub
+
+## Links
+
+* [vgchartz.com](https://www.vgchartz.com)
+* [Original Crawler](https://github.com/GregorUT/vgchartzScrape)
+* [Kaggle Dataset](https://www.kaggle.com/gregorut/videogamesales)
+
+## Greetings
 
-Thanks to Chris Albon.
-http://chrisalbon.com/python/beautiful_soup_scrape_table.html
+Thanks to [Chris Albon](http://chrisalbon.com/python/beautiful_soup_scrape_table.html) 
diff --git a/cfg/resources.json b/cfg/resources.json
new file mode 100644
index 0000000..4e65ceb
--- /dev/null
+++ b/cfg/resources.json
@@ -0,0 +1,44 @@
+{
+  "application_log_filename": "log/app.log",
+  "output_filename": "dataset/vgsales.csv",
+  "separator": ",",
+  "encoding": "utf-8",
+  "start_page": 1,
+  "end_page": 2,
+  "include_genre": false,
+  "base_page_url": "https://www.vgchartz.com/gamedb/?page=",
+  "query_parameters": {
+    "results": 100,
+    "region": "All",
+    "boxart": "Both",
+    "banner": "Both",
+    "ownership": "Both",
+    "showmultiplat": "No",
+    "order": "Sales",
+    "showtotalsales": 1,
+    "showpublisher": 1,
+    "showvgchartzscore": 1,
+    "shownasales": 1,
+    "showdeveloper": 1,
+    "showcriticscore": 1,
+    "showpalsales": 1,
+    "showreleasedate": 1,
+    "showuserscore": 1,
+    "showjapansales": 1,
+    "showlastupdate": 1,
+    "showothersales": 1,
+    "showshipped": 1,
+    "keyword": null,
+    "console": null,
+    "developer": null,
+    "publisher": null,
+    "goty_year": null,
+    "genre": null
+  },
+  "minimum_sleep_time": 6,
+  "maximum_sleep_time": 15,
+  "minimum_major_version": 1,
+  "maximum_major_version": 56,
+  "minimum_minor_version": 1,
+  "maximum_minor_version": 10
+}
diff --git a/dataset/.gitkeep b/dataset/.gitkeep
new file mode 100644
index 0000000..fe91d07
--- /dev/null
+++ b/dataset/.gitkeep
@@ -0,0 +1 @@
+Git doesn't like empty folders
\ No newline at end of file
diff --git a/log/.gitkeep b/log/.gitkeep
new file mode 100644
index 0000000..fe91d07
--- /dev/null
+++ b/log/.gitkeep
@@ -0,0 +1 @@
+Git doesn't like empty folders
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3311a7a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+beautifulsoup4==4.8.2
+bs4==0.0.1
+numpy==1.18.2
+pandas==1.0.3
+python-dateutil==2.8.1
+pytz==2019.3
+six==1.14.0
+soupsieve==2.0
\ No newline at end of file
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..60ece4b
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+python --version >/dev/null 2>&1 || { echo >&2 "I require python@3 utility but it's not installed. ¯\_(ツ)_/¯ Aborting."; exit 1; }
+pip --version >/dev/null 2>&1 || { echo >&2 "I require pip utility but it's not installed. ¯\_(ツ)_/¯ Aborting."; exit 1; }
+
+clear
+
+echo "\nInstalling deps... "
+pip install -r requirements.txt
+
+echo "\nStart crawling... (remember a crawler is the friend nobody likes)"
+python vgchartz-full-crawler.py
+
diff --git a/vgchartz-full-crawler.py b/vgchartz-full-crawler.py
new file mode 100644
index 0000000..7c6c30c
--- /dev/null
+++ b/vgchartz-full-crawler.py
@@ -0,0 +1,339 @@
+from bs4 import BeautifulSoup, element
+from random import randint, choice
+import urllib
+import urllib.request
+import pandas as pd
+import numpy as np
+import logging
+import sys
+import time
+import json
+
+def create_random_header():
+    """
+    Create a random user agent in order to better mimic user behaviour.
+    :return JSON with User-Agent as key and random browser-os combo as value
+    """
+    logging.info("create_random_header >>>")
+    browsers = ["Mozilla", "Chrome"]
+    os_list = ["Windows NT 6.1; Win64; x64", "X11; Linux x86_64"]
+    major_version = randint(properties['minimum_major_version'], properties['maximum_major_version'])
+    minor_version = randint(properties['minimum_minor_version'], properties['maximum_minor_version'])
+    chosen_browser = choice(browsers)
+    chosen_os = choice(os_list)
+
+    user_agent = '{}/{}.{} ({})'.format(
+        chosen_browser,
+        major_version,
+        minor_version,
+        chosen_os)
+    header = {'User-Agent': user_agent}
+    logging.debug("Current user_agent: {}".format(header))
+    logging.info("create_random_header <<<")
+    return header
+
+def generate_remaining_url(*, query_parameters):
+    """
+    Generate an url with a list of videogames from the query params configured at resources.json
+    :return: Url with page number
+    """
+    logging.info("generate_remaining_url >>>")
+    reply=''
+    for param in query_parameters:
+        value=query_parameters.get(param, None)
+        reply += f"&{param}={value}" if value is not None else f"&{param}="
+    logging.debug(f"Url Generated: {base_url}N{reply}")
+    logging.info("generate_remaining_url <<<")
+    return reply
+
+def get_page(*, url):
+    """
+    Perform a GET request to the given URL and return results.
+    Add a wait logic that, combined with random header, will help avoiding
+    HTTP 429 error.
+    :param url: webpage URL
+    :return: HTML page's body
+    """
+    logging.info("get_page >>>")
+    logging.debug("Current URL: {}".format(url))
+    header = create_random_header()
+    request = urllib.request.Request(url, headers=header)
+    result = urllib.request.urlopen(request).read()
+    time.sleep(randint(properties['minimum_sleep_time'], properties['maximum_sleep_time']))
+    logging.info("get_page <<<")
+    return result
+
+
+def get_genre(*, game_url):
+    """
+    Return the game genre retrieved from the given url
+    (It involves another http request)
+    :param game_url:
+    :return: Genre of the input game
+    """
+    logging.info("get_genre >>>")
+    logging.debug("Page to download: {}".format(game_url))
+    site_raw = get_page(url=game_url)
+    sub_soup = BeautifulSoup(site_raw, "html.parser")
+
+    # Eventually the info box is inconsistent among games so we
+    # have to find all the h2 and traverse from that to the genre name
+    # and make a temporary tag here to search
+    # for the one that contains the word "Genre"
+    h2s = sub_soup.find("div", {"id": "gameGenInfoBox"}).find_all('h2')
+    temp_tag = element.Tag
+
+    for h2 in h2s:
+        if h2.string == 'Genre':
+            temp_tag = h2
+
+    genre_value = temp_tag.next_sibling.string
+    logging.debug("Game genre: {}".format(genre_value))
+    logging.info("get_genre <<<")
+    return genre_value
+
+def parse_number(*, number_string):
+    """
+    Return string parsed to float with custom format for millions (m)
+    :param number_string:
+    :return: a float number right parsed
+    """
+    logging.info("parse_number >>>")
+    print(number_string)
+    if "m" in number_string:
+        reply = number_string.strip('m')
+        reply = str(float(reply) * 1000000)
+    else:
+        reply=number_string
+
+    logging.info("parse_number <<<")
+    return float(reply) if not reply.startswith("N/A") else np.nan
+
+def parse_date(*, date_string):
+    """
+    Return the date received as string onto timestamp or N/A.
+    :param date_string:
+    :return: A timestamp in panda date format
+    """
+    logging.info("parse_date >>>")
+    if date_string.startswith('N/A'):
+        date_formatted = 'N/A'
+    else:
+        #i.e. date_string = '18th Feb 20'
+        date_formatted = pd.to_datetime(date_string)
+
+    logging.debug("Date parsed: {}".format(date_formatted))
+    logging.info("parse_date <<<")
+    return date_formatted
+
+def add_current_game_data(*,
+                          current_rank,
+                          current_game_name,
+                          current_game_genre,
+                          current_platform,
+                          current_publisher,
+                          current_developer,
+                          current_vgchartz_score,
+                          current_critic_score,
+                          current_user_score,
+                          current_total_shipped,
+                          current_total_sales,
+                          current_sales_na,
+                          current_sales_pal,
+                          current_sales_jp,
+                          current_sales_ot,
+                          current_release_date,
+                          current_last_update):
+    """
+    Add all the game data to the related lists
+    """
+    logging.info("add_current_game_data >>>")
+    game_name.append(current_game_name)
+    rank.append(current_rank)
+    platform.append(current_platform)
+    genre.append(current_game_genre)
+    publisher.append(current_publisher.strip())
+    developer.append(current_developer.strip())
+    vgchartz_score.append(current_vgchartz_score)
+    critic_score.append(current_critic_score)
+    user_score.append(current_user_score)
+    total_shipped.append(current_total_shipped)
+    total_sales.append(current_total_sales)
+    sales_na.append(current_sales_na)
+    sales_pal.append(current_sales_pal)
+    sales_jp.append(current_sales_jp)
+    sales_ot.append(current_sales_ot)
+    release_date.append(current_release_date)
+    last_update.append(current_last_update)
+    logging.info("add_current_game_data <<<")
+
+
+def download_data(*, start_page, end_page, include_genre):
+    """
+    Download games data from vgchartz: only data whose pages are in the range (start_page, end_page) will be downloaded
+    :param start_page:
+    :param end_page:
+    :param include_genre:
+    :return:
+    """
+    logging.info("download_data >>>")
+    downloaded_games = 0  # Results are decreasingly ordered according to Shipped units
+    for page in range(start_page, end_page + 1):
+        page_url = "{}{}{}".format(base_url, str(page), remaining_url)
+        current_page = get_page(url=page_url)
+        soup = BeautifulSoup(current_page, features="html.parser")
+        logging.info("Downloaded page {}".format(page))
+
+        # We locate the game through search <a> tags with game urls in the main table
+        game_tags = list(filter(
+            lambda x: x.attrs['href'].startswith('https://www.vgchartz.com/game/'),
+            # discard the first 10 elements because those
+            # links are in the navigation bar
+            soup.find_all("a")
+        ))[10:]
+
+        for tag in game_tags:
+
+            current_game_name = " ".join(tag.string.split())
+            data = tag.parent.parent.find_all("td")
+
+            logging.debug("Downloaded game: {}. Name: {}".format(downloaded_games + 1, current_game_name))
+
+            # Get the resto of attributes traverse up the DOM tree looking for the cells in results' table
+            current_rank = np.int32(data[0].string)
+            current_platform = data[3].find('img').attrs['alt']
+            current_publisher = data[4].string
+            current_developer = data[5].string
+            current_vgchartz_score = parse_number(number_string=data[6].string)
+            current_critic_score = parse_number(number_string=data[7].string)
+            current_user_score = parse_number(number_string=data[8].string)
+            current_total_shipped = parse_number(number_string=data[9].string)
+            current_total_sales = parse_number(number_string=data[10].string)
+            current_sales_na = parse_number(number_string=data[11].string)
+            current_sales_pal = parse_number(number_string=data[12].string)
+            current_sales_jp = parse_number(number_string=data[13].string)
+            current_sales_ot = parse_number(number_string=data[14].string)
+            current_release_date = parse_date(date_string=data[15].string)
+            current_last_update = parse_date(date_string=data[16].string)
+
+            # The genre requires another HTTP Request, so it's made at the end
+            game_url = tag.attrs['href']
+            current_game_genre = ""
+            if include_genre:
+                current_game_genre = get_genre(game_url=game_url)
+
+            add_current_game_data(
+                current_rank=current_rank,
+                current_game_name=current_game_name,
+                current_game_genre=current_game_genre,
+                current_platform=current_platform,
+                current_publisher=current_publisher,
+                current_developer=current_developer,
+                current_vgchartz_score=current_vgchartz_score,
+                current_critic_score=current_critic_score,
+                current_user_score=current_user_score,
+                current_total_shipped=current_total_shipped,
+                current_total_sales=current_total_sales,
+                current_sales_na=current_sales_na,
+                current_sales_pal=current_sales_pal,
+                current_sales_jp=current_sales_jp,
+                current_sales_ot=current_sales_ot,
+                current_release_date=current_release_date,
+                current_last_update=current_last_update)
+
+            downloaded_games += 1
+
+    logging.info("Number of downloaded resources: {}".format(downloaded_games))
+    logging.info("download_data <<<")
+
+
+def save_games_data(*, filename, separator, enc):
+    """
+    Save all the downloaded data into the specified file
+    :param filename
+    :param separator
+    :param enc
+    """
+    logging.info("save_games_data >>>")
+    columns = {
+        'Rank': rank,
+        'Name': game_name,
+        'Genre': genre,
+        'Platform': platform,
+        'Publisher': publisher,
+        'Developer': developer,
+        'Vgchartz_Score': vgchartz_score,
+        'Critic_Score': critic_score,
+        'User_Score': user_score,
+        'Total_Shipped': total_shipped,
+        'Total_Sales': total_sales,
+        'NA_Sales': sales_na,
+        'PAL_Sales': sales_pal,
+        'JP_Sales': sales_jp,
+        'Other_Sales': sales_ot,
+        'Release_Date': release_date,
+        'Last_Update': last_update
+    }
+
+    df = pd.DataFrame(columns)
+    logging.debug("Dataframe column name: {}".format(df.columns))
+    df = df[[ 'Rank', 'Name', 'Genre', 'Platform', 'Publisher', 'Developer',
+              'Vgchartz_Score', 'Critic_Score', 'User_Score', 'Total_Shipped',
+              'Total_Sales', 'NA_Sales', 'PAL_Sales', 'JP_Sales', 'Other_Sales',
+              'Release_Date', 'Last_Update' ]]
+
+    df.to_csv(filename, sep=separator, encoding=enc, index=False)
+    logging.info("save_games_data <<<")
+
+if __name__ == "__main__":
+
+    # Buffers
+    rank = []
+    game_name = []
+    genre = []
+    platform = []
+    publisher, developer = [], []
+    critic_score, user_score, vgchartz_score = [], [], []
+    total_shipped = []
+    total_sales, sales_na, sales_pal, sales_jp, sales_ot = [], [], [], [], []
+    release_date, last_update = [], []
+
+    properties = None
+
+    with open("cfg/resources.json") as file:
+        properties = json.load(file)
+
+    logging.root.handlers = []
+    logging.basicConfig(format='%(asctime)s|%(name)s|%(levelname)s| %(message)s',
+                        level=logging.DEBUG,
+                        filename=properties["application_log_filename"])
+
+    # set up logging to console
+    console = logging.StreamHandler()
+    console.setLevel(logging.DEBUG)
+
+    # set a format which is simpler for console use
+    formatter = logging.Formatter(fmt='%(asctime)s|%(name)s|%(levelname)s| %(message)s',
+                                  datefmt="%d-%m-%Y %H:%M:%S")
+    console.setFormatter(formatter)
+    logging.getLogger("").addHandler(console)
+
+    try:
+        logging.info('Application started')
+        base_url = properties['base_page_url']
+        remaining_url=generate_remaining_url(query_parameters=properties['query_parameters'])
+
+        download_data(
+            start_page=properties['start_page'],
+            end_page=properties['end_page'],
+            include_genre=properties['include_genre'])
+
+        save_games_data(
+            filename=properties['output_filename'],
+            separator=properties['separator'],
+            enc=properties['encoding'])
+
+    except:
+        print("Global exception")
+        print("Unexpected error:", sys.exc_info())
+        pass
diff --git a/vgchartzfull.py b/vgchartzfull.py
deleted file mode 100644
index b1d75a4..0000000
--- a/vgchartzfull.py
+++ /dev/null
@@ -1,130 +0,0 @@
-from bs4 import BeautifulSoup, element
-import urllib
-import pandas as pd
-import numpy as np
-
-pages = 19
-rec_count = 0
-rank = []
-gname = []
-platform = []
-year = []
-genre = []
-critic_score = []
-user_score = []
-publisher = []
-developer = []
-sales_na = []
-sales_pal = []
-sales_jp = []
-sales_ot = []
-sales_gl = []
-
-urlhead = 'http://www.vgchartz.com/gamedb/?page='
-urltail = '&console=&region=All&developer=&publisher=&genre=&boxart=Both&ownership=Both'
-urltail += '&results=1000&order=Sales&showtotalsales=0&showtotalsales=1&showpublisher=0'
-urltail += '&showpublisher=1&showvgchartzscore=0&shownasales=1&showdeveloper=1&showcriticscore=1'
-urltail += '&showpalsales=0&showpalsales=1&showreleasedate=1&showuserscore=1&showjapansales=1'
-urltail += '&showlastupdate=0&showothersales=1&showgenre=1&sort=GL'
-
-for page in range(1, pages):
-    surl = urlhead + str(page) + urltail
-    r = urllib.request.urlopen(surl).read()
-    soup = BeautifulSoup(r)
-    print(f"Page: {page}")
-
-    # vgchartz website is really weird so we have to search for
-    # <a> tags with game urls
-    game_tags = list(filter(
-        lambda x: x.attrs['href'].startswith('http://www.vgchartz.com/game/'),
-        # discard the first 10 elements because those
-        # links are in the navigation bar
-        soup.find_all("a")
-    ))[10:]
-
-    for tag in game_tags:
-
-        # add name to list
-        gname.append(" ".join(tag.string.split()))
-        print(f"{rec_count + 1} Fetch data for game {gname[-1]}")
-
-        # get different attributes
-        # traverse up the DOM tree
-        data = tag.parent.parent.find_all("td")
-        rank.append(np.int32(data[0].string))
-        platform.append(data[3].find('img').attrs['alt'])
-        publisher.append(data[4].string)
-        developer.append(data[5].string)
-        critic_score.append(
-            float(data[6].string) if
-            not data[6].string.startswith("N/A") else np.nan)
-        user_score.append(
-            float(data[7].string) if
-            not data[7].string.startswith("N/A") else np.nan)
-        sales_na.append(
-            float(data[9].string[:-1]) if
-            not data[9].string.startswith("N/A") else np.nan)
-        sales_pal.append(
-            float(data[10].string[:-1]) if
-            not data[10].string.startswith("N/A") else np.nan)
-        sales_jp.append(
-            float(data[11].string[:-1]) if
-            not data[11].string.startswith("N/A") else np.nan)
-        sales_ot.append(
-            float(data[12].string[:-1]) if
-            not data[12].string.startswith("N/A") else np.nan)
-        sales_gl.append(
-            float(data[8].string[:-1]) if
-            not data[8].string.startswith("N/A") else np.nan)
-        release_year = data[13].string.split()[-1]
-        # different format for year
-        if release_year.startswith('N/A'):
-            year.append('N/A')
-        else:
-            if int(release_year) >= 80:
-                year_to_add = np.int32("19" + release_year)
-            else:
-                year_to_add = np.int32("20" + release_year)
-            year.append(year_to_add)
-
-        # go to every individual website to get genre info
-        url_to_game = tag.attrs['href']
-        site_raw = urllib.request.urlopen(url_to_game).read()
-        sub_soup = BeautifulSoup(site_raw, "html.parser")
-        # again, the info box is inconsistent among games so we
-        # have to find all the h2 and traverse from that to the genre name
-        h2s = sub_soup.find("div", {"id": "gameGenInfoBox"}).find_all('h2')
-        # make a temporary tag here to search for the one that contains
-        # the word "Genre"
-        temp_tag = element.Tag
-        for h2 in h2s:
-            if h2.string == 'Genre':
-                temp_tag = h2
-        genre.append(temp_tag.next_sibling.string)
-
-        rec_count += 1
-
-columns = {
-    'Rank': rank,
-    'Name': gname,
-    'Platform': platform,
-    'Year': year,
-    'Genre': genre,
-    'Critic_Score': critic_score,
-    'User_Score': user_score,
-    'Publisher': publisher,
-    'Developer': developer,
-    'NA_Sales': sales_na,
-    'PAL_Sales': sales_pal,
-    'JP_Sales': sales_jp,
-    'Other_Sales': sales_ot,
-    'Global_Sales': sales_gl
-}
-print(rec_count)
-df = pd.DataFrame(columns)
-print(df.columns)
-df = df[[
-    'Rank', 'Name', 'Platform', 'Year', 'Genre',
-    'Publisher', 'Developer', 'Critic_Score', 'User_Score',
-    'NA_Sales', 'PAL_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']]
-df.to_csv("vgsales.csv", sep=",", encoding='utf-8', index=False)