From e97b367083ef2b4ce74c1c88cbf2c44a2f964f32 Mon Sep 17 00:00:00 2001
From: WENKAI TAN <52226880+WENKAITAN@users.noreply.github.com>
Date: Thu, 9 Sep 2021 12:48:41 -0400
Subject: [PATCH 1/6] Add files via upload
---
Exercise.ipynb | 1306 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 1306 insertions(+)
create mode 100644 Exercise.ipynb
diff --git a/Exercise.ipynb b/Exercise.ipynb
new file mode 100644
index 00000000..49ecff9a
--- /dev/null
+++ b/Exercise.ipynb
@@ -0,0 +1,1306 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Data wrangling with Pandas exercise\n",
+ "* For this exercise we will be using the `listings.csv` data file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "zACK is the best teacher!!!\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"zACK is the best teacher!!!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Load in the data file using `pd.read_csv()`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " host_id | \n",
+ " host_name | \n",
+ " neighbourhood_group | \n",
+ " neighbourhood | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " room_type | \n",
+ " price | \n",
+ " minimum_nights | \n",
+ " number_of_reviews | \n",
+ " last_review | \n",
+ " reviews_per_month | \n",
+ " calculated_host_listings_count | \n",
+ " availability_365 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2595 | \n",
+ " Skylit Midtown Castle | \n",
+ " 2845 | \n",
+ " Jennifer | \n",
+ " Manhattan | \n",
+ " Midtown | \n",
+ " 40.75362 | \n",
+ " -73.98377 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 3 | \n",
+ " 48 | \n",
+ " 2019-11-04 | \n",
+ " 0.37 | \n",
+ " 2 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3831 | \n",
+ " Whole flr w/private bdrm, bath & kitchen(pls r... | \n",
+ " 4869 | \n",
+ " LisaRoxanne | \n",
+ " Brooklyn | \n",
+ " Clinton Hill | \n",
+ " 40.68514 | \n",
+ " -73.95976 | \n",
+ " Entire home/apt | \n",
+ " 75 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ " 2020-08-01 | \n",
+ " 4.75 | \n",
+ " 1 | \n",
+ " 265 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5121 | \n",
+ " BlissArtsSpace! | \n",
+ " 7356 | \n",
+ " Garon | \n",
+ " Brooklyn | \n",
+ " Bedford-Stuyvesant | \n",
+ " 40.68688 | \n",
+ " -73.95596 | \n",
+ " Private room | \n",
+ " 60 | \n",
+ " 29 | \n",
+ " 50 | \n",
+ " 2019-12-02 | \n",
+ " 0.37 | \n",
+ " 1 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5136 | \n",
+ " Spacious Brooklyn Duplex, Patio + Garden | \n",
+ " 7378 | \n",
+ " Rebecca | \n",
+ " Brooklyn | \n",
+ " Sunset Park | \n",
+ " 40.66120 | \n",
+ " -73.99423 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 14 | \n",
+ " 1 | \n",
+ " 2014-01-02 | \n",
+ " 0.01 | \n",
+ " 1 | \n",
+ " 295 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5178 | \n",
+ " Large Furnished Room Near B'way | \n",
+ " 8967 | \n",
+ " Shunichi | \n",
+ " Manhattan | \n",
+ " Hell's Kitchen | \n",
+ " 40.76489 | \n",
+ " -73.98493 | \n",
+ " Private room | \n",
+ " 65 | \n",
+ " 2 | \n",
+ " 473 | \n",
+ " 2020-03-15 | \n",
+ " 3.44 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name host_id \\\n",
+ "0 2595 Skylit Midtown Castle 2845 \n",
+ "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n",
+ "2 5121 BlissArtsSpace! 7356 \n",
+ "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n",
+ "4 5178 Large Furnished Room Near B'way 8967 \n",
+ "\n",
+ " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
+ "0 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
+ "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
+ "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596 \n",
+ "3 Rebecca Brooklyn Sunset Park 40.66120 -73.99423 \n",
+ "4 Shunichi Manhattan Hell's Kitchen 40.76489 -73.98493 \n",
+ "\n",
+ " room_type price minimum_nights number_of_reviews last_review \\\n",
+ "0 Entire home/apt 175 3 48 2019-11-04 \n",
+ "1 Entire home/apt 75 1 340 2020-08-01 \n",
+ "2 Private room 60 29 50 2019-12-02 \n",
+ "3 Entire home/apt 175 14 1 2014-01-02 \n",
+ "4 Private room 65 2 473 2020-03-15 \n",
+ "\n",
+ " reviews_per_month calculated_host_listings_count availability_365 \n",
+ "0 0.37 2 365 \n",
+ "1 4.75 1 265 \n",
+ "2 0.37 1 365 \n",
+ "3 0.01 1 295 \n",
+ "4 3.44 1 340 "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load data here\n",
+ "df = pd.read_csv(\"./data/listings.csv\", sep=\",\")\n",
+ "\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(46527, 16)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Exercise 2 - Filtering\n",
+ "\n",
+ "Return the following subsets of the dataframe.\n",
+ "\n",
+ "1. How many listings are there with a price less than 100? \n",
+ "\n",
+ "\n",
+ "2. Find how many listings there are in just Brooklyn.\n",
+ "\n",
+ "\n",
+ "3. Find how many listings there are in Brooklyn with a price less than 100.\n",
+ "\n",
+ "\n",
+ "4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n",
+ "\n",
+ "\n",
+ "5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. The prices for all other listings should be the same as the were before. \n",
+ "\n",
+ "\n",
+ "6. What % of the rooms are private, and what % of the rooms are shared. \n",
+ " * Hint, use `.value_counts()`\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(22778, 16)"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1. How many listings are there with a price less than 100? \n",
+ "price_lessthan_100 = df[\"price\"] < 100\n",
+ "\n",
+ "df_less_than_100 = df[price_lessthan_100]\n",
+ "df_less_than_100.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(18632, 16)"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2. Make a new DataFrame of listings in Brooklyn named `df_bk` \n",
+ "# and find how many listings in just Brooklyn.\n",
+ "\n",
+ "list_in_brk = df[\"neighbourhood_group\"] == \"Brooklyn\"\n",
+ "df_bk = df[list_in_brk ]\n",
+ "df_bk.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(10473, 16)"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3. Find how many listings there are in Brooklyn with a price less than 100.\n",
+ "list_in_bk_and_less_than_100 = df_bk[\"price\"] < 100 \n",
+ "\n",
+ "df_bk_less_100 = df_bk[list_in_bk_and_less_than_100]\n",
+ "\n",
+ "df_bk_less_100.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " host_id | \n",
+ " host_name | \n",
+ " neighbourhood_group | \n",
+ " neighbourhood | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " room_type | \n",
+ " price | \n",
+ " minimum_nights | \n",
+ " number_of_reviews | \n",
+ " last_review | \n",
+ " reviews_per_month | \n",
+ " calculated_host_listings_count | \n",
+ " availability_365 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 52 | \n",
+ " 16595 | \n",
+ " LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... | \n",
+ " 64522 | \n",
+ " Daniel | \n",
+ " Brooklyn | \n",
+ " Williamsburg | \n",
+ " 40.70933 | \n",
+ " -73.96792 | \n",
+ " Entire home/apt | \n",
+ " 271 | \n",
+ " 1 | \n",
+ " 172 | \n",
+ " 2020-07-14 | \n",
+ " 1.44 | \n",
+ " 1 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 201 | \n",
+ " 61747 | \n",
+ " Cozy, Brooklyn, Prospect Park Studio | \n",
+ " 299370 | \n",
+ " David | \n",
+ " Brooklyn | \n",
+ " Prospect-Lefferts Gardens | \n",
+ " 40.65979 | \n",
+ " -73.96180 | \n",
+ " Entire home/apt | \n",
+ " 91 | \n",
+ " 14 | \n",
+ " 97 | \n",
+ " 2018-01-31 | \n",
+ " 0.83 | \n",
+ " 1 | \n",
+ " 44 | \n",
+ "
\n",
+ " \n",
+ " | 209 | \n",
+ " 62903 | \n",
+ " Beautiful modern studio apartment in heart of NYC | \n",
+ " 306605 | \n",
+ " Daniel | \n",
+ " Manhattan | \n",
+ " Chelsea | \n",
+ " 40.74238 | \n",
+ " -73.99567 | \n",
+ " Entire home/apt | \n",
+ " 205 | \n",
+ " 15 | \n",
+ " 68 | \n",
+ " 2019-12-14 | \n",
+ " 0.67 | \n",
+ " 2 | \n",
+ " 89 | \n",
+ "
\n",
+ " \n",
+ " | 220 | \n",
+ " 64015 | \n",
+ " Prime East Village 1 Bedroom | \n",
+ " 146944 | \n",
+ " David | \n",
+ " Manhattan | \n",
+ " East Village | \n",
+ " 40.72807 | \n",
+ " -73.98594 | \n",
+ " Entire home/apt | \n",
+ " 200 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 260 | \n",
+ " 74073 | \n",
+ " Food & Music Dream Apartment in Williamsburg | \n",
+ " 211877 | \n",
+ " Daniel | \n",
+ " Brooklyn | \n",
+ " Williamsburg | \n",
+ " 40.71113 | \n",
+ " -73.96054 | \n",
+ " Entire home/apt | \n",
+ " 187 | \n",
+ " 30 | \n",
+ " 90 | \n",
+ " 2020-07-31 | \n",
+ " 0.81 | \n",
+ " 1 | \n",
+ " 261 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 46362 | \n",
+ " 44639591 | \n",
+ " Central & Stylish 1 Bedroom Apt - Heart of Che... | \n",
+ " 286136716 | \n",
+ " John | \n",
+ " Manhattan | \n",
+ " Chelsea | \n",
+ " 40.74568 | \n",
+ " -73.99694 | \n",
+ " Entire home/apt | \n",
+ " 110 | \n",
+ " 30 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 3 | \n",
+ " 110 | \n",
+ "
\n",
+ " \n",
+ " | 46396 | \n",
+ " 44661297 | \n",
+ " Flushing Sunshine home #101 | \n",
+ " 361579037 | \n",
+ " Daniel | \n",
+ " Queens | \n",
+ " Flushing | \n",
+ " 40.74603 | \n",
+ " -73.82837 | \n",
+ " Private room | \n",
+ " 52 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " 360 | \n",
+ "
\n",
+ " \n",
+ " | 46403 | \n",
+ " 44662157 | \n",
+ " Flushing Sunshine home #102 | \n",
+ " 361579037 | \n",
+ " Daniel | \n",
+ " Queens | \n",
+ " Flushing | \n",
+ " 40.74441 | \n",
+ " -73.82829 | \n",
+ " Private room | \n",
+ " 55 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 2020-08-16 | \n",
+ " 3.00 | \n",
+ " 2 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 46455 | \n",
+ " 44697211 | \n",
+ " David’s Queen Sized Room | \n",
+ " 343477029 | \n",
+ " David | \n",
+ " Queens | \n",
+ " Far Rockaway | \n",
+ " 40.59460 | \n",
+ " -73.75875 | \n",
+ " Private room | \n",
+ " 95 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 46508 | \n",
+ " 44797527 | \n",
+ " Long-Term: Furnished Apt in Nolita w/ Amenities | \n",
+ " 19448640 | \n",
+ " David | \n",
+ " Manhattan | \n",
+ " Nolita | \n",
+ " 40.72289 | \n",
+ " -73.99400 | \n",
+ " Entire home/apt | \n",
+ " 140 | \n",
+ " 30 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 282 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1258 rows × 16 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name host_id \\\n",
+ "52 16595 LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... 64522 \n",
+ "201 61747 Cozy, Brooklyn, Prospect Park Studio 299370 \n",
+ "209 62903 Beautiful modern studio apartment in heart of NYC 306605 \n",
+ "220 64015 Prime East Village 1 Bedroom 146944 \n",
+ "260 74073 Food & Music Dream Apartment in Williamsburg 211877 \n",
+ "... ... ... ... \n",
+ "46362 44639591 Central & Stylish 1 Bedroom Apt - Heart of Che... 286136716 \n",
+ "46396 44661297 Flushing Sunshine home #101 361579037 \n",
+ "46403 44662157 Flushing Sunshine home #102 361579037 \n",
+ "46455 44697211 David’s Queen Sized Room 343477029 \n",
+ "46508 44797527 Long-Term: Furnished Apt in Nolita w/ Amenities 19448640 \n",
+ "\n",
+ " host_name neighbourhood_group neighbourhood latitude \\\n",
+ "52 Daniel Brooklyn Williamsburg 40.70933 \n",
+ "201 David Brooklyn Prospect-Lefferts Gardens 40.65979 \n",
+ "209 Daniel Manhattan Chelsea 40.74238 \n",
+ "220 David Manhattan East Village 40.72807 \n",
+ "260 Daniel Brooklyn Williamsburg 40.71113 \n",
+ "... ... ... ... ... \n",
+ "46362 John Manhattan Chelsea 40.74568 \n",
+ "46396 Daniel Queens Flushing 40.74603 \n",
+ "46403 Daniel Queens Flushing 40.74441 \n",
+ "46455 David Queens Far Rockaway 40.59460 \n",
+ "46508 David Manhattan Nolita 40.72289 \n",
+ "\n",
+ " longitude room_type price minimum_nights number_of_reviews \\\n",
+ "52 -73.96792 Entire home/apt 271 1 172 \n",
+ "201 -73.96180 Entire home/apt 91 14 97 \n",
+ "209 -73.99567 Entire home/apt 205 15 68 \n",
+ "220 -73.98594 Entire home/apt 200 3 0 \n",
+ "260 -73.96054 Entire home/apt 187 30 90 \n",
+ "... ... ... ... ... ... \n",
+ "46362 -73.99694 Entire home/apt 110 30 0 \n",
+ "46396 -73.82837 Private room 52 1 0 \n",
+ "46403 -73.82829 Private room 55 1 3 \n",
+ "46455 -73.75875 Private room 95 1 0 \n",
+ "46508 -73.99400 Entire home/apt 140 30 0 \n",
+ "\n",
+ " last_review reviews_per_month calculated_host_listings_count \\\n",
+ "52 2020-07-14 1.44 1 \n",
+ "201 2018-01-31 0.83 1 \n",
+ "209 2019-12-14 0.67 2 \n",
+ "220 NaN NaN 1 \n",
+ "260 2020-07-31 0.81 1 \n",
+ "... ... ... ... \n",
+ "46362 NaN NaN 3 \n",
+ "46396 NaN NaN 2 \n",
+ "46403 2020-08-16 3.00 2 \n",
+ "46455 NaN NaN 1 \n",
+ "46508 NaN NaN 1 \n",
+ "\n",
+ " availability_365 \n",
+ "52 365 \n",
+ "201 44 \n",
+ "209 89 \n",
+ "220 0 \n",
+ "260 261 \n",
+ "... ... \n",
+ "46362 110 \n",
+ "46396 360 \n",
+ "46403 365 \n",
+ "46455 365 \n",
+ "46508 282 \n",
+ "\n",
+ "[1258 rows x 16 columns]"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n",
+ "\n",
+ "names = [\"Michael\", \"David\", \"John\", \"Daniel\"]\n",
+ "select_host = df[\"host_name\"].isin(names)\n",
+ "df_select_host = df[select_host]\n",
+ "df_select_host"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " host_id | \n",
+ " host_name | \n",
+ " neighbourhood_group | \n",
+ " neighbourhood | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " room_type | \n",
+ " price | \n",
+ " minimum_nights | \n",
+ " number_of_reviews | \n",
+ " last_review | \n",
+ " reviews_per_month | \n",
+ " calculated_host_listings_count | \n",
+ " availability_365 | \n",
+ " adjusted_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2595 | \n",
+ " Skylit Midtown Castle | \n",
+ " 2845 | \n",
+ " Jennifer | \n",
+ " Manhattan | \n",
+ " Midtown | \n",
+ " 40.75362 | \n",
+ " -73.98377 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 3 | \n",
+ " 48 | \n",
+ " 2019-11-04 | \n",
+ " 0.37 | \n",
+ " 2 | \n",
+ " 365 | \n",
+ " 275 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3831 | \n",
+ " Whole flr w/private bdrm, bath & kitchen(pls r... | \n",
+ " 4869 | \n",
+ " LisaRoxanne | \n",
+ " Brooklyn | \n",
+ " Clinton Hill | \n",
+ " 40.68514 | \n",
+ " -73.95976 | \n",
+ " Entire home/apt | \n",
+ " 75 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ " 2020-08-01 | \n",
+ " 4.75 | \n",
+ " 1 | \n",
+ " 265 | \n",
+ " 175 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5121 | \n",
+ " BlissArtsSpace! | \n",
+ " 7356 | \n",
+ " Garon | \n",
+ " Brooklyn | \n",
+ " Bedford-Stuyvesant | \n",
+ " 40.68688 | \n",
+ " -73.95596 | \n",
+ " Private room | \n",
+ " 60 | \n",
+ " 29 | \n",
+ " 50 | \n",
+ " 2019-12-02 | \n",
+ " 0.37 | \n",
+ " 1 | \n",
+ " 365 | \n",
+ " 160 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5136 | \n",
+ " Spacious Brooklyn Duplex, Patio + Garden | \n",
+ " 7378 | \n",
+ " Rebecca | \n",
+ " Brooklyn | \n",
+ " Sunset Park | \n",
+ " 40.66120 | \n",
+ " -73.99423 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 14 | \n",
+ " 1 | \n",
+ " 2014-01-02 | \n",
+ " 0.01 | \n",
+ " 1 | \n",
+ " 295 | \n",
+ " 275 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5178 | \n",
+ " Large Furnished Room Near B'way | \n",
+ " 8967 | \n",
+ " Shunichi | \n",
+ " Manhattan | \n",
+ " Hell's Kitchen | \n",
+ " 40.76489 | \n",
+ " -73.98493 | \n",
+ " Private room | \n",
+ " 65 | \n",
+ " 2 | \n",
+ " 473 | \n",
+ " 2020-03-15 | \n",
+ " 3.44 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ " 165 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 46522 | \n",
+ " 44807522 | \n",
+ " Designer Gramercy Studio Townhouse by UNSQ | \n",
+ " 12941925 | \n",
+ " Brian | \n",
+ " Manhattan | \n",
+ " Gramercy | \n",
+ " 40.73433 | \n",
+ " -73.98383 | \n",
+ " Entire home/apt | \n",
+ " 145 | \n",
+ " 7 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 164 | \n",
+ " 245 | \n",
+ "
\n",
+ " \n",
+ " | 46523 | \n",
+ " 44807786 | \n",
+ " Cozy & comfy apt in the heart of Inwood Manhattan | \n",
+ " 284790520 | \n",
+ " Salar | \n",
+ " Manhattan | \n",
+ " Washington Heights | \n",
+ " 40.85820 | \n",
+ " -73.92733 | \n",
+ " Entire home/apt | \n",
+ " 87 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " 85 | \n",
+ " 187 | \n",
+ "
\n",
+ " \n",
+ " | 46524 | \n",
+ " 44811717 | \n",
+ " Comfortable safe environment 24hr security camera | \n",
+ " 362453686 | \n",
+ " Nicole | \n",
+ " Brooklyn | \n",
+ " East Flatbush | \n",
+ " 40.65399 | \n",
+ " -73.93287 | \n",
+ " Private room | \n",
+ " 59 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 90 | \n",
+ " 159 | \n",
+ "
\n",
+ " \n",
+ " | 46525 | \n",
+ " 44814944 | \n",
+ " Upper West Side studio 86th Street | \n",
+ " 4039777 | \n",
+ " Fernando | \n",
+ " Manhattan | \n",
+ " Upper West Side | \n",
+ " 40.78731 | \n",
+ " -73.97029 | \n",
+ " Entire home/apt | \n",
+ " 80 | \n",
+ " 30 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 113 | \n",
+ " 180 | \n",
+ "
\n",
+ " \n",
+ " | 46526 | \n",
+ " 44818009 | \n",
+ " 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN | \n",
+ " 48098268 | \n",
+ " Marina | \n",
+ " Brooklyn | \n",
+ " Gravesend | \n",
+ " 40.59945 | \n",
+ " -73.98209 | \n",
+ " Private room | \n",
+ " 66 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 38 | \n",
+ " 166 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
46527 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name host_id \\\n",
+ "0 2595 Skylit Midtown Castle 2845 \n",
+ "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n",
+ "2 5121 BlissArtsSpace! 7356 \n",
+ "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n",
+ "4 5178 Large Furnished Room Near B'way 8967 \n",
+ "... ... ... ... \n",
+ "46522 44807522 Designer Gramercy Studio Townhouse by UNSQ 12941925 \n",
+ "46523 44807786 Cozy & comfy apt in the heart of Inwood Manhattan 284790520 \n",
+ "46524 44811717 Comfortable safe environment 24hr security camera 362453686 \n",
+ "46525 44814944 Upper West Side studio 86th Street 4039777 \n",
+ "46526 44818009 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN 48098268 \n",
+ "\n",
+ " host_name neighbourhood_group neighbourhood latitude \\\n",
+ "0 Jennifer Manhattan Midtown 40.75362 \n",
+ "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 \n",
+ "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 \n",
+ "3 Rebecca Brooklyn Sunset Park 40.66120 \n",
+ "4 Shunichi Manhattan Hell's Kitchen 40.76489 \n",
+ "... ... ... ... ... \n",
+ "46522 Brian Manhattan Gramercy 40.73433 \n",
+ "46523 Salar Manhattan Washington Heights 40.85820 \n",
+ "46524 Nicole Brooklyn East Flatbush 40.65399 \n",
+ "46525 Fernando Manhattan Upper West Side 40.78731 \n",
+ "46526 Marina Brooklyn Gravesend 40.59945 \n",
+ "\n",
+ " longitude room_type price minimum_nights number_of_reviews \\\n",
+ "0 -73.98377 Entire home/apt 175 3 48 \n",
+ "1 -73.95976 Entire home/apt 75 1 340 \n",
+ "2 -73.95596 Private room 60 29 50 \n",
+ "3 -73.99423 Entire home/apt 175 14 1 \n",
+ "4 -73.98493 Private room 65 2 473 \n",
+ "... ... ... ... ... ... \n",
+ "46522 -73.98383 Entire home/apt 145 7 0 \n",
+ "46523 -73.92733 Entire home/apt 87 6 0 \n",
+ "46524 -73.93287 Private room 59 3 0 \n",
+ "46525 -73.97029 Entire home/apt 80 30 0 \n",
+ "46526 -73.98209 Private room 66 1 0 \n",
+ "\n",
+ " last_review reviews_per_month calculated_host_listings_count \\\n",
+ "0 2019-11-04 0.37 2 \n",
+ "1 2020-08-01 4.75 1 \n",
+ "2 2019-12-02 0.37 1 \n",
+ "3 2014-01-02 0.01 1 \n",
+ "4 2020-03-15 3.44 1 \n",
+ "... ... ... ... \n",
+ "46522 NaN NaN 1 \n",
+ "46523 NaN NaN 2 \n",
+ "46524 NaN NaN 1 \n",
+ "46525 NaN NaN 1 \n",
+ "46526 NaN NaN 1 \n",
+ "\n",
+ " availability_365 adjusted_price \n",
+ "0 365 275 \n",
+ "1 265 175 \n",
+ "2 365 160 \n",
+ "3 295 275 \n",
+ "4 340 165 \n",
+ "... ... ... \n",
+ "46522 164 245 \n",
+ "46523 85 187 \n",
+ "46524 90 159 \n",
+ "46525 113 180 \n",
+ "46526 38 166 \n",
+ "\n",
+ "[46527 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. \n",
+ "# The prices for all other listings should be the same as the were before. \n",
+ "df[\"adjusted_price\"] = df[\"price\"]+100\n",
+ "df\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "room_type\n",
+ "Entire home/apt 23998\n",
+ "Private room 21144\n",
+ "Shared room 987\n",
+ "Hotel room 398\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 6. What % of the rooms are private, and what % of the rooms are shared. \n",
+ "\n",
+ "room_types = df.value_counts(\"room_type\")\n",
+ "room_types\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "46527"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "total_rooms = room_types.sum()\n",
+ "total_rooms "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "percenatge_private_room: 0.45444580566122894\n",
+ "percenatge_shared_room: 0.0212134889419047\n"
+ ]
+ }
+ ],
+ "source": [
+ "private_room = room_types[\"Private room\"]\n",
+ "shared_room = room_types[\"Shared room\"]\n",
+ "percenatge_private_room = private_room / total_rooms\n",
+ "percenatge_shared_room = shared_room / total_rooms\n",
+ "\n",
+ "print(\"percenatge_private_room: \", percenatge_private_room)\n",
+ "print(\"percenatge_shared_room: \", percenatge_shared_room )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exercise 3 - Grouping\n",
+ "\n",
+ "1. Using `groupby`, count how many listings are in each neighbourhood_group.\n",
+ "\n",
+ "\n",
+ "2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n",
+ "\n",
+ "\n",
+ "3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n",
+ "\n",
+ "\n",
+ "4. Using `groupby`, find the median price for each room type in each neighbourhood_group.\n",
+ "\n",
+ "\n",
+ "5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices for each room type in each neighbourhood_group."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1. Using `groupby`, count how many listings are in each neighbourhood_group.\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 4. Using `groupby`, find the mean price for each room type in each neighbourhood_group.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices \n",
+ "# for each room type in each neighbourhood_group.\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Join and file saving.\n",
+ "1. Load the `prices.csv` and the `n_listings.csv`\n",
+ "\n",
+ "\n",
+ "2. Do join that keeps all the records for each table.\n",
+ " * Neighbourhood groups should include ['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island',\n",
+ " 'LongIsland']\n",
+ " \n",
+ " \n",
+ "3. Save your joined csv as `joined.csv`\n",
+ "\n",
+ "\n",
+ "4. Load your saved table and see if it looks the same or different that the DataFrame you used to create it. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1. Load the `prices.csv` and the `n_listings.csv`\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 2. Do join that keeps all the records for each table.\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Use the grammys.csv data for the next section of questions.\n",
+ "\n",
+ "1. Who was won Album of the Year in 2016?\n",
+ "\n",
+ "\n",
+ "2. Who won Best Rap Album in 2009?\n",
+ "\n",
+ "\n",
+ "3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1. Who was won Album of the Year in 2016?\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 2. Who won Best Rap Album in 2009?\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
From 9c81be305d1a2c9c0557787316ed0a2ee9e88a40 Mon Sep 17 00:00:00 2001
From: WENKAITAN
Date: Thu, 16 Sep 2021 12:47:01 -0400
Subject: [PATCH 2/6] comment
---
Week-01-Pandas/Exercise.ipynb | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/Week-01-Pandas/Exercise.ipynb b/Week-01-Pandas/Exercise.ipynb
index ba0ed20f..94e1277d 100644
--- a/Week-01-Pandas/Exercise.ipynb
+++ b/Week-01-Pandas/Exercise.ipynb
@@ -8,6 +8,15 @@
"* For this exercise we will be using the `listings.csv` data file."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"zACK is the best teacher!!!\")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 23,
From 6493cb4feba8131bb7fa2146d6e46e952b7cd832 Mon Sep 17 00:00:00 2001
From: WENKAITAN
Date: Thu, 16 Sep 2021 13:00:09 -0400
Subject: [PATCH 3/6] hw
---
Week-01-Pandas/Exercise.ipynb | 1026 ++++++++++++++++++++++++++++++++-
1 file changed, 1002 insertions(+), 24 deletions(-)
diff --git a/Week-01-Pandas/Exercise.ipynb b/Week-01-Pandas/Exercise.ipynb
index 94e1277d..49ecff9a 100644
--- a/Week-01-Pandas/Exercise.ipynb
+++ b/Week-01-Pandas/Exercise.ipynb
@@ -10,16 +10,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "zACK is the best teacher!!!\n"
+ ]
+ }
+ ],
"source": [
"print(\"zACK is the best teacher!!!\")"
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -36,12 +44,208 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " host_id | \n",
+ " host_name | \n",
+ " neighbourhood_group | \n",
+ " neighbourhood | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " room_type | \n",
+ " price | \n",
+ " minimum_nights | \n",
+ " number_of_reviews | \n",
+ " last_review | \n",
+ " reviews_per_month | \n",
+ " calculated_host_listings_count | \n",
+ " availability_365 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2595 | \n",
+ " Skylit Midtown Castle | \n",
+ " 2845 | \n",
+ " Jennifer | \n",
+ " Manhattan | \n",
+ " Midtown | \n",
+ " 40.75362 | \n",
+ " -73.98377 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 3 | \n",
+ " 48 | \n",
+ " 2019-11-04 | \n",
+ " 0.37 | \n",
+ " 2 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3831 | \n",
+ " Whole flr w/private bdrm, bath & kitchen(pls r... | \n",
+ " 4869 | \n",
+ " LisaRoxanne | \n",
+ " Brooklyn | \n",
+ " Clinton Hill | \n",
+ " 40.68514 | \n",
+ " -73.95976 | \n",
+ " Entire home/apt | \n",
+ " 75 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ " 2020-08-01 | \n",
+ " 4.75 | \n",
+ " 1 | \n",
+ " 265 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5121 | \n",
+ " BlissArtsSpace! | \n",
+ " 7356 | \n",
+ " Garon | \n",
+ " Brooklyn | \n",
+ " Bedford-Stuyvesant | \n",
+ " 40.68688 | \n",
+ " -73.95596 | \n",
+ " Private room | \n",
+ " 60 | \n",
+ " 29 | \n",
+ " 50 | \n",
+ " 2019-12-02 | \n",
+ " 0.37 | \n",
+ " 1 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5136 | \n",
+ " Spacious Brooklyn Duplex, Patio + Garden | \n",
+ " 7378 | \n",
+ " Rebecca | \n",
+ " Brooklyn | \n",
+ " Sunset Park | \n",
+ " 40.66120 | \n",
+ " -73.99423 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 14 | \n",
+ " 1 | \n",
+ " 2014-01-02 | \n",
+ " 0.01 | \n",
+ " 1 | \n",
+ " 295 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5178 | \n",
+ " Large Furnished Room Near B'way | \n",
+ " 8967 | \n",
+ " Shunichi | \n",
+ " Manhattan | \n",
+ " Hell's Kitchen | \n",
+ " 40.76489 | \n",
+ " -73.98493 | \n",
+ " Private room | \n",
+ " 65 | \n",
+ " 2 | \n",
+ " 473 | \n",
+ " 2020-03-15 | \n",
+ " 3.44 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name host_id \\\n",
+ "0 2595 Skylit Midtown Castle 2845 \n",
+ "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n",
+ "2 5121 BlissArtsSpace! 7356 \n",
+ "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n",
+ "4 5178 Large Furnished Room Near B'way 8967 \n",
+ "\n",
+ " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
+ "0 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
+ "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
+ "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596 \n",
+ "3 Rebecca Brooklyn Sunset Park 40.66120 -73.99423 \n",
+ "4 Shunichi Manhattan Hell's Kitchen 40.76489 -73.98493 \n",
+ "\n",
+ " room_type price minimum_nights number_of_reviews last_review \\\n",
+ "0 Entire home/apt 175 3 48 2019-11-04 \n",
+ "1 Entire home/apt 75 1 340 2020-08-01 \n",
+ "2 Private room 60 29 50 2019-12-02 \n",
+ "3 Entire home/apt 175 14 1 2014-01-02 \n",
+ "4 Private room 65 2 473 2020-03-15 \n",
+ "\n",
+ " reviews_per_month calculated_host_listings_count availability_365 \n",
+ "0 0.37 2 365 \n",
+ "1 4.75 1 265 \n",
+ "2 0.37 1 365 \n",
+ "3 0.01 1 295 \n",
+ "4 3.44 1 340 "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Load data here\n",
- "\n"
+ "df = pd.read_csv(\"./data/listings.csv\", sep=\",\")\n",
+ "\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(46527, 16)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
]
},
{
@@ -73,69 +277,843 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 19,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(22778, 16)"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 1. How many listings are there with a price less than 100? \n",
+ "price_lessthan_100 = df[\"price\"] < 100\n",
"\n",
- "\n"
+ "df_less_than_100 = df[price_lessthan_100]\n",
+ "df_less_than_100.shape"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 25,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(18632, 16)"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 2. Make a new DataFrame of listings in Brooklyn named `df_bk` \n",
"# and find how many listings in just Brooklyn.\n",
- "\n"
+ "\n",
+ "list_in_brk = df[\"neighbourhood_group\"] == \"Brooklyn\"\n",
+ "df_bk = df[list_in_brk ]\n",
+ "df_bk.shape"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(10473, 16)"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 3. Find how many listings there are in Brooklyn with a price less than 100.\n",
+ "list_in_bk_and_less_than_100 = df_bk[\"price\"] < 100 \n",
"\n",
- "\n"
+ "df_bk_less_100 = df_bk[list_in_bk_and_less_than_100]\n",
+ "\n",
+ "df_bk_less_100.shape"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 28,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " host_id | \n",
+ " host_name | \n",
+ " neighbourhood_group | \n",
+ " neighbourhood | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " room_type | \n",
+ " price | \n",
+ " minimum_nights | \n",
+ " number_of_reviews | \n",
+ " last_review | \n",
+ " reviews_per_month | \n",
+ " calculated_host_listings_count | \n",
+ " availability_365 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 52 | \n",
+ " 16595 | \n",
+ " LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... | \n",
+ " 64522 | \n",
+ " Daniel | \n",
+ " Brooklyn | \n",
+ " Williamsburg | \n",
+ " 40.70933 | \n",
+ " -73.96792 | \n",
+ " Entire home/apt | \n",
+ " 271 | \n",
+ " 1 | \n",
+ " 172 | \n",
+ " 2020-07-14 | \n",
+ " 1.44 | \n",
+ " 1 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 201 | \n",
+ " 61747 | \n",
+ " Cozy, Brooklyn, Prospect Park Studio | \n",
+ " 299370 | \n",
+ " David | \n",
+ " Brooklyn | \n",
+ " Prospect-Lefferts Gardens | \n",
+ " 40.65979 | \n",
+ " -73.96180 | \n",
+ " Entire home/apt | \n",
+ " 91 | \n",
+ " 14 | \n",
+ " 97 | \n",
+ " 2018-01-31 | \n",
+ " 0.83 | \n",
+ " 1 | \n",
+ " 44 | \n",
+ "
\n",
+ " \n",
+ " | 209 | \n",
+ " 62903 | \n",
+ " Beautiful modern studio apartment in heart of NYC | \n",
+ " 306605 | \n",
+ " Daniel | \n",
+ " Manhattan | \n",
+ " Chelsea | \n",
+ " 40.74238 | \n",
+ " -73.99567 | \n",
+ " Entire home/apt | \n",
+ " 205 | \n",
+ " 15 | \n",
+ " 68 | \n",
+ " 2019-12-14 | \n",
+ " 0.67 | \n",
+ " 2 | \n",
+ " 89 | \n",
+ "
\n",
+ " \n",
+ " | 220 | \n",
+ " 64015 | \n",
+ " Prime East Village 1 Bedroom | \n",
+ " 146944 | \n",
+ " David | \n",
+ " Manhattan | \n",
+ " East Village | \n",
+ " 40.72807 | \n",
+ " -73.98594 | \n",
+ " Entire home/apt | \n",
+ " 200 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 260 | \n",
+ " 74073 | \n",
+ " Food & Music Dream Apartment in Williamsburg | \n",
+ " 211877 | \n",
+ " Daniel | \n",
+ " Brooklyn | \n",
+ " Williamsburg | \n",
+ " 40.71113 | \n",
+ " -73.96054 | \n",
+ " Entire home/apt | \n",
+ " 187 | \n",
+ " 30 | \n",
+ " 90 | \n",
+ " 2020-07-31 | \n",
+ " 0.81 | \n",
+ " 1 | \n",
+ " 261 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 46362 | \n",
+ " 44639591 | \n",
+ " Central & Stylish 1 Bedroom Apt - Heart of Che... | \n",
+ " 286136716 | \n",
+ " John | \n",
+ " Manhattan | \n",
+ " Chelsea | \n",
+ " 40.74568 | \n",
+ " -73.99694 | \n",
+ " Entire home/apt | \n",
+ " 110 | \n",
+ " 30 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 3 | \n",
+ " 110 | \n",
+ "
\n",
+ " \n",
+ " | 46396 | \n",
+ " 44661297 | \n",
+ " Flushing Sunshine home #101 | \n",
+ " 361579037 | \n",
+ " Daniel | \n",
+ " Queens | \n",
+ " Flushing | \n",
+ " 40.74603 | \n",
+ " -73.82837 | \n",
+ " Private room | \n",
+ " 52 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " 360 | \n",
+ "
\n",
+ " \n",
+ " | 46403 | \n",
+ " 44662157 | \n",
+ " Flushing Sunshine home #102 | \n",
+ " 361579037 | \n",
+ " Daniel | \n",
+ " Queens | \n",
+ " Flushing | \n",
+ " 40.74441 | \n",
+ " -73.82829 | \n",
+ " Private room | \n",
+ " 55 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 2020-08-16 | \n",
+ " 3.00 | \n",
+ " 2 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 46455 | \n",
+ " 44697211 | \n",
+ " David’s Queen Sized Room | \n",
+ " 343477029 | \n",
+ " David | \n",
+ " Queens | \n",
+ " Far Rockaway | \n",
+ " 40.59460 | \n",
+ " -73.75875 | \n",
+ " Private room | \n",
+ " 95 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 365 | \n",
+ "
\n",
+ " \n",
+ " | 46508 | \n",
+ " 44797527 | \n",
+ " Long-Term: Furnished Apt in Nolita w/ Amenities | \n",
+ " 19448640 | \n",
+ " David | \n",
+ " Manhattan | \n",
+ " Nolita | \n",
+ " 40.72289 | \n",
+ " -73.99400 | \n",
+ " Entire home/apt | \n",
+ " 140 | \n",
+ " 30 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 282 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1258 rows × 16 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name host_id \\\n",
+ "52 16595 LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... 64522 \n",
+ "201 61747 Cozy, Brooklyn, Prospect Park Studio 299370 \n",
+ "209 62903 Beautiful modern studio apartment in heart of NYC 306605 \n",
+ "220 64015 Prime East Village 1 Bedroom 146944 \n",
+ "260 74073 Food & Music Dream Apartment in Williamsburg 211877 \n",
+ "... ... ... ... \n",
+ "46362 44639591 Central & Stylish 1 Bedroom Apt - Heart of Che... 286136716 \n",
+ "46396 44661297 Flushing Sunshine home #101 361579037 \n",
+ "46403 44662157 Flushing Sunshine home #102 361579037 \n",
+ "46455 44697211 David’s Queen Sized Room 343477029 \n",
+ "46508 44797527 Long-Term: Furnished Apt in Nolita w/ Amenities 19448640 \n",
+ "\n",
+ " host_name neighbourhood_group neighbourhood latitude \\\n",
+ "52 Daniel Brooklyn Williamsburg 40.70933 \n",
+ "201 David Brooklyn Prospect-Lefferts Gardens 40.65979 \n",
+ "209 Daniel Manhattan Chelsea 40.74238 \n",
+ "220 David Manhattan East Village 40.72807 \n",
+ "260 Daniel Brooklyn Williamsburg 40.71113 \n",
+ "... ... ... ... ... \n",
+ "46362 John Manhattan Chelsea 40.74568 \n",
+ "46396 Daniel Queens Flushing 40.74603 \n",
+ "46403 Daniel Queens Flushing 40.74441 \n",
+ "46455 David Queens Far Rockaway 40.59460 \n",
+ "46508 David Manhattan Nolita 40.72289 \n",
+ "\n",
+ " longitude room_type price minimum_nights number_of_reviews \\\n",
+ "52 -73.96792 Entire home/apt 271 1 172 \n",
+ "201 -73.96180 Entire home/apt 91 14 97 \n",
+ "209 -73.99567 Entire home/apt 205 15 68 \n",
+ "220 -73.98594 Entire home/apt 200 3 0 \n",
+ "260 -73.96054 Entire home/apt 187 30 90 \n",
+ "... ... ... ... ... ... \n",
+ "46362 -73.99694 Entire home/apt 110 30 0 \n",
+ "46396 -73.82837 Private room 52 1 0 \n",
+ "46403 -73.82829 Private room 55 1 3 \n",
+ "46455 -73.75875 Private room 95 1 0 \n",
+ "46508 -73.99400 Entire home/apt 140 30 0 \n",
+ "\n",
+ " last_review reviews_per_month calculated_host_listings_count \\\n",
+ "52 2020-07-14 1.44 1 \n",
+ "201 2018-01-31 0.83 1 \n",
+ "209 2019-12-14 0.67 2 \n",
+ "220 NaN NaN 1 \n",
+ "260 2020-07-31 0.81 1 \n",
+ "... ... ... ... \n",
+ "46362 NaN NaN 3 \n",
+ "46396 NaN NaN 2 \n",
+ "46403 2020-08-16 3.00 2 \n",
+ "46455 NaN NaN 1 \n",
+ "46508 NaN NaN 1 \n",
+ "\n",
+ " availability_365 \n",
+ "52 365 \n",
+ "201 44 \n",
+ "209 89 \n",
+ "220 0 \n",
+ "260 261 \n",
+ "... ... \n",
+ "46362 110 \n",
+ "46396 360 \n",
+ "46403 365 \n",
+ "46455 365 \n",
+ "46508 282 \n",
+ "\n",
+ "[1258 rows x 16 columns]"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n",
"\n",
- "\n"
+ "names = [\"Michael\", \"David\", \"John\", \"Daniel\"]\n",
+ "select_host = df[\"host_name\"].isin(names)\n",
+ "df_select_host = df[select_host]\n",
+ "df_select_host"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 29,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " host_id | \n",
+ " host_name | \n",
+ " neighbourhood_group | \n",
+ " neighbourhood | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " room_type | \n",
+ " price | \n",
+ " minimum_nights | \n",
+ " number_of_reviews | \n",
+ " last_review | \n",
+ " reviews_per_month | \n",
+ " calculated_host_listings_count | \n",
+ " availability_365 | \n",
+ " adjusted_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2595 | \n",
+ " Skylit Midtown Castle | \n",
+ " 2845 | \n",
+ " Jennifer | \n",
+ " Manhattan | \n",
+ " Midtown | \n",
+ " 40.75362 | \n",
+ " -73.98377 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 3 | \n",
+ " 48 | \n",
+ " 2019-11-04 | \n",
+ " 0.37 | \n",
+ " 2 | \n",
+ " 365 | \n",
+ " 275 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3831 | \n",
+ " Whole flr w/private bdrm, bath & kitchen(pls r... | \n",
+ " 4869 | \n",
+ " LisaRoxanne | \n",
+ " Brooklyn | \n",
+ " Clinton Hill | \n",
+ " 40.68514 | \n",
+ " -73.95976 | \n",
+ " Entire home/apt | \n",
+ " 75 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ " 2020-08-01 | \n",
+ " 4.75 | \n",
+ " 1 | \n",
+ " 265 | \n",
+ " 175 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5121 | \n",
+ " BlissArtsSpace! | \n",
+ " 7356 | \n",
+ " Garon | \n",
+ " Brooklyn | \n",
+ " Bedford-Stuyvesant | \n",
+ " 40.68688 | \n",
+ " -73.95596 | \n",
+ " Private room | \n",
+ " 60 | \n",
+ " 29 | \n",
+ " 50 | \n",
+ " 2019-12-02 | \n",
+ " 0.37 | \n",
+ " 1 | \n",
+ " 365 | \n",
+ " 160 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5136 | \n",
+ " Spacious Brooklyn Duplex, Patio + Garden | \n",
+ " 7378 | \n",
+ " Rebecca | \n",
+ " Brooklyn | \n",
+ " Sunset Park | \n",
+ " 40.66120 | \n",
+ " -73.99423 | \n",
+ " Entire home/apt | \n",
+ " 175 | \n",
+ " 14 | \n",
+ " 1 | \n",
+ " 2014-01-02 | \n",
+ " 0.01 | \n",
+ " 1 | \n",
+ " 295 | \n",
+ " 275 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5178 | \n",
+ " Large Furnished Room Near B'way | \n",
+ " 8967 | \n",
+ " Shunichi | \n",
+ " Manhattan | \n",
+ " Hell's Kitchen | \n",
+ " 40.76489 | \n",
+ " -73.98493 | \n",
+ " Private room | \n",
+ " 65 | \n",
+ " 2 | \n",
+ " 473 | \n",
+ " 2020-03-15 | \n",
+ " 3.44 | \n",
+ " 1 | \n",
+ " 340 | \n",
+ " 165 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 46522 | \n",
+ " 44807522 | \n",
+ " Designer Gramercy Studio Townhouse by UNSQ | \n",
+ " 12941925 | \n",
+ " Brian | \n",
+ " Manhattan | \n",
+ " Gramercy | \n",
+ " 40.73433 | \n",
+ " -73.98383 | \n",
+ " Entire home/apt | \n",
+ " 145 | \n",
+ " 7 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 164 | \n",
+ " 245 | \n",
+ "
\n",
+ " \n",
+ " | 46523 | \n",
+ " 44807786 | \n",
+ " Cozy & comfy apt in the heart of Inwood Manhattan | \n",
+ " 284790520 | \n",
+ " Salar | \n",
+ " Manhattan | \n",
+ " Washington Heights | \n",
+ " 40.85820 | \n",
+ " -73.92733 | \n",
+ " Entire home/apt | \n",
+ " 87 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " 85 | \n",
+ " 187 | \n",
+ "
\n",
+ " \n",
+ " | 46524 | \n",
+ " 44811717 | \n",
+ " Comfortable safe environment 24hr security camera | \n",
+ " 362453686 | \n",
+ " Nicole | \n",
+ " Brooklyn | \n",
+ " East Flatbush | \n",
+ " 40.65399 | \n",
+ " -73.93287 | \n",
+ " Private room | \n",
+ " 59 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 90 | \n",
+ " 159 | \n",
+ "
\n",
+ " \n",
+ " | 46525 | \n",
+ " 44814944 | \n",
+ " Upper West Side studio 86th Street | \n",
+ " 4039777 | \n",
+ " Fernando | \n",
+ " Manhattan | \n",
+ " Upper West Side | \n",
+ " 40.78731 | \n",
+ " -73.97029 | \n",
+ " Entire home/apt | \n",
+ " 80 | \n",
+ " 30 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 113 | \n",
+ " 180 | \n",
+ "
\n",
+ " \n",
+ " | 46526 | \n",
+ " 44818009 | \n",
+ " 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN | \n",
+ " 48098268 | \n",
+ " Marina | \n",
+ " Brooklyn | \n",
+ " Gravesend | \n",
+ " 40.59945 | \n",
+ " -73.98209 | \n",
+ " Private room | \n",
+ " 66 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " 38 | \n",
+ " 166 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
46527 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name host_id \\\n",
+ "0 2595 Skylit Midtown Castle 2845 \n",
+ "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n",
+ "2 5121 BlissArtsSpace! 7356 \n",
+ "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n",
+ "4 5178 Large Furnished Room Near B'way 8967 \n",
+ "... ... ... ... \n",
+ "46522 44807522 Designer Gramercy Studio Townhouse by UNSQ 12941925 \n",
+ "46523 44807786 Cozy & comfy apt in the heart of Inwood Manhattan 284790520 \n",
+ "46524 44811717 Comfortable safe environment 24hr security camera 362453686 \n",
+ "46525 44814944 Upper West Side studio 86th Street 4039777 \n",
+ "46526 44818009 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN 48098268 \n",
+ "\n",
+ " host_name neighbourhood_group neighbourhood latitude \\\n",
+ "0 Jennifer Manhattan Midtown 40.75362 \n",
+ "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 \n",
+ "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 \n",
+ "3 Rebecca Brooklyn Sunset Park 40.66120 \n",
+ "4 Shunichi Manhattan Hell's Kitchen 40.76489 \n",
+ "... ... ... ... ... \n",
+ "46522 Brian Manhattan Gramercy 40.73433 \n",
+ "46523 Salar Manhattan Washington Heights 40.85820 \n",
+ "46524 Nicole Brooklyn East Flatbush 40.65399 \n",
+ "46525 Fernando Manhattan Upper West Side 40.78731 \n",
+ "46526 Marina Brooklyn Gravesend 40.59945 \n",
+ "\n",
+ " longitude room_type price minimum_nights number_of_reviews \\\n",
+ "0 -73.98377 Entire home/apt 175 3 48 \n",
+ "1 -73.95976 Entire home/apt 75 1 340 \n",
+ "2 -73.95596 Private room 60 29 50 \n",
+ "3 -73.99423 Entire home/apt 175 14 1 \n",
+ "4 -73.98493 Private room 65 2 473 \n",
+ "... ... ... ... ... ... \n",
+ "46522 -73.98383 Entire home/apt 145 7 0 \n",
+ "46523 -73.92733 Entire home/apt 87 6 0 \n",
+ "46524 -73.93287 Private room 59 3 0 \n",
+ "46525 -73.97029 Entire home/apt 80 30 0 \n",
+ "46526 -73.98209 Private room 66 1 0 \n",
+ "\n",
+ " last_review reviews_per_month calculated_host_listings_count \\\n",
+ "0 2019-11-04 0.37 2 \n",
+ "1 2020-08-01 4.75 1 \n",
+ "2 2019-12-02 0.37 1 \n",
+ "3 2014-01-02 0.01 1 \n",
+ "4 2020-03-15 3.44 1 \n",
+ "... ... ... ... \n",
+ "46522 NaN NaN 1 \n",
+ "46523 NaN NaN 2 \n",
+ "46524 NaN NaN 1 \n",
+ "46525 NaN NaN 1 \n",
+ "46526 NaN NaN 1 \n",
+ "\n",
+ " availability_365 adjusted_price \n",
+ "0 365 275 \n",
+ "1 265 175 \n",
+ "2 365 160 \n",
+ "3 295 275 \n",
+ "4 340 165 \n",
+ "... ... ... \n",
+ "46522 164 245 \n",
+ "46523 85 187 \n",
+ "46524 90 159 \n",
+ "46525 113 180 \n",
+ "46526 38 166 \n",
+ "\n",
+ "[46527 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. \n",
"# The prices for all other listings should be the same as the were before. \n",
- "\n"
+ "df[\"adjusted_price\"] = df[\"price\"]+100\n",
+ "df\n"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 47,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "room_type\n",
+ "Entire home/apt 23998\n",
+ "Private room 21144\n",
+ "Shared room 987\n",
+ "Hotel room 398\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 6. What % of the rooms are private, and what % of the rooms are shared. \n",
"\n",
+ "room_types = df.value_counts(\"room_type\")\n",
+ "room_types\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "46527"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "total_rooms = room_types.sum()\n",
+ "total_rooms "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "percenatge_private_room: 0.45444580566122894\n",
+ "percenatge_shared_room: 0.0212134889419047\n"
+ ]
+ }
+ ],
+ "source": [
+ "private_room = room_types[\"Private room\"]\n",
+ "shared_room = room_types[\"Shared room\"]\n",
+ "percenatge_private_room = private_room / total_rooms\n",
+ "percenatge_shared_room = shared_room / total_rooms\n",
"\n",
- "\n"
+ "print(\"percenatge_private_room: \", percenatge_private_room)\n",
+ "print(\"percenatge_shared_room: \", percenatge_shared_room )"
]
},
{
From 18dda68156e68f1d906fdc1a023e5c08a56401cf Mon Sep 17 00:00:00 2001
From: WENKAITAN
Date: Thu, 16 Sep 2021 13:02:17 -0400
Subject: [PATCH 4/6] delete duplicatef file
---
.DS_Store | Bin 0 -> 6148 bytes
Exercise.ipynb | 1306 ------------------------------------------------
2 files changed, 1306 deletions(-)
create mode 100644 .DS_Store
delete mode 100644 Exercise.ipynb
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
GIT binary patch
literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0\n",
- "\n",
- "\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " host_id | \n",
- " host_name | \n",
- " neighbourhood_group | \n",
- " neighbourhood | \n",
- " latitude | \n",
- " longitude | \n",
- " room_type | \n",
- " price | \n",
- " minimum_nights | \n",
- " number_of_reviews | \n",
- " last_review | \n",
- " reviews_per_month | \n",
- " calculated_host_listings_count | \n",
- " availability_365 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 2595 | \n",
- " Skylit Midtown Castle | \n",
- " 2845 | \n",
- " Jennifer | \n",
- " Manhattan | \n",
- " Midtown | \n",
- " 40.75362 | \n",
- " -73.98377 | \n",
- " Entire home/apt | \n",
- " 175 | \n",
- " 3 | \n",
- " 48 | \n",
- " 2019-11-04 | \n",
- " 0.37 | \n",
- " 2 | \n",
- " 365 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 3831 | \n",
- " Whole flr w/private bdrm, bath & kitchen(pls r... | \n",
- " 4869 | \n",
- " LisaRoxanne | \n",
- " Brooklyn | \n",
- " Clinton Hill | \n",
- " 40.68514 | \n",
- " -73.95976 | \n",
- " Entire home/apt | \n",
- " 75 | \n",
- " 1 | \n",
- " 340 | \n",
- " 2020-08-01 | \n",
- " 4.75 | \n",
- " 1 | \n",
- " 265 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 5121 | \n",
- " BlissArtsSpace! | \n",
- " 7356 | \n",
- " Garon | \n",
- " Brooklyn | \n",
- " Bedford-Stuyvesant | \n",
- " 40.68688 | \n",
- " -73.95596 | \n",
- " Private room | \n",
- " 60 | \n",
- " 29 | \n",
- " 50 | \n",
- " 2019-12-02 | \n",
- " 0.37 | \n",
- " 1 | \n",
- " 365 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 5136 | \n",
- " Spacious Brooklyn Duplex, Patio + Garden | \n",
- " 7378 | \n",
- " Rebecca | \n",
- " Brooklyn | \n",
- " Sunset Park | \n",
- " 40.66120 | \n",
- " -73.99423 | \n",
- " Entire home/apt | \n",
- " 175 | \n",
- " 14 | \n",
- " 1 | \n",
- " 2014-01-02 | \n",
- " 0.01 | \n",
- " 1 | \n",
- " 295 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 5178 | \n",
- " Large Furnished Room Near B'way | \n",
- " 8967 | \n",
- " Shunichi | \n",
- " Manhattan | \n",
- " Hell's Kitchen | \n",
- " 40.76489 | \n",
- " -73.98493 | \n",
- " Private room | \n",
- " 65 | \n",
- " 2 | \n",
- " 473 | \n",
- " 2020-03-15 | \n",
- " 3.44 | \n",
- " 1 | \n",
- " 340 | \n",
- "
\n",
- " \n",
- "
\n",
- ""
- ],
- "text/plain": [
- " id name host_id \\\n",
- "0 2595 Skylit Midtown Castle 2845 \n",
- "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n",
- "2 5121 BlissArtsSpace! 7356 \n",
- "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n",
- "4 5178 Large Furnished Room Near B'way 8967 \n",
- "\n",
- " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
- "0 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
- "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
- "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596 \n",
- "3 Rebecca Brooklyn Sunset Park 40.66120 -73.99423 \n",
- "4 Shunichi Manhattan Hell's Kitchen 40.76489 -73.98493 \n",
- "\n",
- " room_type price minimum_nights number_of_reviews last_review \\\n",
- "0 Entire home/apt 175 3 48 2019-11-04 \n",
- "1 Entire home/apt 75 1 340 2020-08-01 \n",
- "2 Private room 60 29 50 2019-12-02 \n",
- "3 Entire home/apt 175 14 1 2014-01-02 \n",
- "4 Private room 65 2 473 2020-03-15 \n",
- "\n",
- " reviews_per_month calculated_host_listings_count availability_365 \n",
- "0 0.37 2 365 \n",
- "1 4.75 1 265 \n",
- "2 0.37 1 365 \n",
- "3 0.01 1 295 \n",
- "4 3.44 1 340 "
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Load data here\n",
- "df = pd.read_csv(\"./data/listings.csv\", sep=\",\")\n",
- "\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(46527, 16)"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.shape"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Exercise 2 - Filtering\n",
- "\n",
- "Return the following subsets of the dataframe.\n",
- "\n",
- "1. How many listings are there with a price less than 100? \n",
- "\n",
- "\n",
- "2. Find how many listings there are in just Brooklyn.\n",
- "\n",
- "\n",
- "3. Find how many listings there are in Brooklyn with a price less than 100.\n",
- "\n",
- "\n",
- "4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n",
- "\n",
- "\n",
- "5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. The prices for all other listings should be the same as the were before. \n",
- "\n",
- "\n",
- "6. What % of the rooms are private, and what % of the rooms are shared. \n",
- " * Hint, use `.value_counts()`\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(22778, 16)"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 1. How many listings are there with a price less than 100? \n",
- "price_lessthan_100 = df[\"price\"] < 100\n",
- "\n",
- "df_less_than_100 = df[price_lessthan_100]\n",
- "df_less_than_100.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(18632, 16)"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 2. Make a new DataFrame of listings in Brooklyn named `df_bk` \n",
- "# and find how many listings in just Brooklyn.\n",
- "\n",
- "list_in_brk = df[\"neighbourhood_group\"] == \"Brooklyn\"\n",
- "df_bk = df[list_in_brk ]\n",
- "df_bk.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(10473, 16)"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 3. Find how many listings there are in Brooklyn with a price less than 100.\n",
- "list_in_bk_and_less_than_100 = df_bk[\"price\"] < 100 \n",
- "\n",
- "df_bk_less_100 = df_bk[list_in_bk_and_less_than_100]\n",
- "\n",
- "df_bk_less_100.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " host_id | \n",
- " host_name | \n",
- " neighbourhood_group | \n",
- " neighbourhood | \n",
- " latitude | \n",
- " longitude | \n",
- " room_type | \n",
- " price | \n",
- " minimum_nights | \n",
- " number_of_reviews | \n",
- " last_review | \n",
- " reviews_per_month | \n",
- " calculated_host_listings_count | \n",
- " availability_365 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 52 | \n",
- " 16595 | \n",
- " LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... | \n",
- " 64522 | \n",
- " Daniel | \n",
- " Brooklyn | \n",
- " Williamsburg | \n",
- " 40.70933 | \n",
- " -73.96792 | \n",
- " Entire home/apt | \n",
- " 271 | \n",
- " 1 | \n",
- " 172 | \n",
- " 2020-07-14 | \n",
- " 1.44 | \n",
- " 1 | \n",
- " 365 | \n",
- "
\n",
- " \n",
- " | 201 | \n",
- " 61747 | \n",
- " Cozy, Brooklyn, Prospect Park Studio | \n",
- " 299370 | \n",
- " David | \n",
- " Brooklyn | \n",
- " Prospect-Lefferts Gardens | \n",
- " 40.65979 | \n",
- " -73.96180 | \n",
- " Entire home/apt | \n",
- " 91 | \n",
- " 14 | \n",
- " 97 | \n",
- " 2018-01-31 | \n",
- " 0.83 | \n",
- " 1 | \n",
- " 44 | \n",
- "
\n",
- " \n",
- " | 209 | \n",
- " 62903 | \n",
- " Beautiful modern studio apartment in heart of NYC | \n",
- " 306605 | \n",
- " Daniel | \n",
- " Manhattan | \n",
- " Chelsea | \n",
- " 40.74238 | \n",
- " -73.99567 | \n",
- " Entire home/apt | \n",
- " 205 | \n",
- " 15 | \n",
- " 68 | \n",
- " 2019-12-14 | \n",
- " 0.67 | \n",
- " 2 | \n",
- " 89 | \n",
- "
\n",
- " \n",
- " | 220 | \n",
- " 64015 | \n",
- " Prime East Village 1 Bedroom | \n",
- " 146944 | \n",
- " David | \n",
- " Manhattan | \n",
- " East Village | \n",
- " 40.72807 | \n",
- " -73.98594 | \n",
- " Entire home/apt | \n",
- " 200 | \n",
- " 3 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 260 | \n",
- " 74073 | \n",
- " Food & Music Dream Apartment in Williamsburg | \n",
- " 211877 | \n",
- " Daniel | \n",
- " Brooklyn | \n",
- " Williamsburg | \n",
- " 40.71113 | \n",
- " -73.96054 | \n",
- " Entire home/apt | \n",
- " 187 | \n",
- " 30 | \n",
- " 90 | \n",
- " 2020-07-31 | \n",
- " 0.81 | \n",
- " 1 | \n",
- " 261 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 46362 | \n",
- " 44639591 | \n",
- " Central & Stylish 1 Bedroom Apt - Heart of Che... | \n",
- " 286136716 | \n",
- " John | \n",
- " Manhattan | \n",
- " Chelsea | \n",
- " 40.74568 | \n",
- " -73.99694 | \n",
- " Entire home/apt | \n",
- " 110 | \n",
- " 30 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 3 | \n",
- " 110 | \n",
- "
\n",
- " \n",
- " | 46396 | \n",
- " 44661297 | \n",
- " Flushing Sunshine home #101 | \n",
- " 361579037 | \n",
- " Daniel | \n",
- " Queens | \n",
- " Flushing | \n",
- " 40.74603 | \n",
- " -73.82837 | \n",
- " Private room | \n",
- " 52 | \n",
- " 1 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 2 | \n",
- " 360 | \n",
- "
\n",
- " \n",
- " | 46403 | \n",
- " 44662157 | \n",
- " Flushing Sunshine home #102 | \n",
- " 361579037 | \n",
- " Daniel | \n",
- " Queens | \n",
- " Flushing | \n",
- " 40.74441 | \n",
- " -73.82829 | \n",
- " Private room | \n",
- " 55 | \n",
- " 1 | \n",
- " 3 | \n",
- " 2020-08-16 | \n",
- " 3.00 | \n",
- " 2 | \n",
- " 365 | \n",
- "
\n",
- " \n",
- " | 46455 | \n",
- " 44697211 | \n",
- " David’s Queen Sized Room | \n",
- " 343477029 | \n",
- " David | \n",
- " Queens | \n",
- " Far Rockaway | \n",
- " 40.59460 | \n",
- " -73.75875 | \n",
- " Private room | \n",
- " 95 | \n",
- " 1 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 365 | \n",
- "
\n",
- " \n",
- " | 46508 | \n",
- " 44797527 | \n",
- " Long-Term: Furnished Apt in Nolita w/ Amenities | \n",
- " 19448640 | \n",
- " David | \n",
- " Manhattan | \n",
- " Nolita | \n",
- " 40.72289 | \n",
- " -73.99400 | \n",
- " Entire home/apt | \n",
- " 140 | \n",
- " 30 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 282 | \n",
- "
\n",
- " \n",
- "
\n",
- "
1258 rows × 16 columns
\n",
- "
"
- ],
- "text/plain": [
- " id name host_id \\\n",
- "52 16595 LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... 64522 \n",
- "201 61747 Cozy, Brooklyn, Prospect Park Studio 299370 \n",
- "209 62903 Beautiful modern studio apartment in heart of NYC 306605 \n",
- "220 64015 Prime East Village 1 Bedroom 146944 \n",
- "260 74073 Food & Music Dream Apartment in Williamsburg 211877 \n",
- "... ... ... ... \n",
- "46362 44639591 Central & Stylish 1 Bedroom Apt - Heart of Che... 286136716 \n",
- "46396 44661297 Flushing Sunshine home #101 361579037 \n",
- "46403 44662157 Flushing Sunshine home #102 361579037 \n",
- "46455 44697211 David’s Queen Sized Room 343477029 \n",
- "46508 44797527 Long-Term: Furnished Apt in Nolita w/ Amenities 19448640 \n",
- "\n",
- " host_name neighbourhood_group neighbourhood latitude \\\n",
- "52 Daniel Brooklyn Williamsburg 40.70933 \n",
- "201 David Brooklyn Prospect-Lefferts Gardens 40.65979 \n",
- "209 Daniel Manhattan Chelsea 40.74238 \n",
- "220 David Manhattan East Village 40.72807 \n",
- "260 Daniel Brooklyn Williamsburg 40.71113 \n",
- "... ... ... ... ... \n",
- "46362 John Manhattan Chelsea 40.74568 \n",
- "46396 Daniel Queens Flushing 40.74603 \n",
- "46403 Daniel Queens Flushing 40.74441 \n",
- "46455 David Queens Far Rockaway 40.59460 \n",
- "46508 David Manhattan Nolita 40.72289 \n",
- "\n",
- " longitude room_type price minimum_nights number_of_reviews \\\n",
- "52 -73.96792 Entire home/apt 271 1 172 \n",
- "201 -73.96180 Entire home/apt 91 14 97 \n",
- "209 -73.99567 Entire home/apt 205 15 68 \n",
- "220 -73.98594 Entire home/apt 200 3 0 \n",
- "260 -73.96054 Entire home/apt 187 30 90 \n",
- "... ... ... ... ... ... \n",
- "46362 -73.99694 Entire home/apt 110 30 0 \n",
- "46396 -73.82837 Private room 52 1 0 \n",
- "46403 -73.82829 Private room 55 1 3 \n",
- "46455 -73.75875 Private room 95 1 0 \n",
- "46508 -73.99400 Entire home/apt 140 30 0 \n",
- "\n",
- " last_review reviews_per_month calculated_host_listings_count \\\n",
- "52 2020-07-14 1.44 1 \n",
- "201 2018-01-31 0.83 1 \n",
- "209 2019-12-14 0.67 2 \n",
- "220 NaN NaN 1 \n",
- "260 2020-07-31 0.81 1 \n",
- "... ... ... ... \n",
- "46362 NaN NaN 3 \n",
- "46396 NaN NaN 2 \n",
- "46403 2020-08-16 3.00 2 \n",
- "46455 NaN NaN 1 \n",
- "46508 NaN NaN 1 \n",
- "\n",
- " availability_365 \n",
- "52 365 \n",
- "201 44 \n",
- "209 89 \n",
- "220 0 \n",
- "260 261 \n",
- "... ... \n",
- "46362 110 \n",
- "46396 360 \n",
- "46403 365 \n",
- "46455 365 \n",
- "46508 282 \n",
- "\n",
- "[1258 rows x 16 columns]"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n",
- "\n",
- "names = [\"Michael\", \"David\", \"John\", \"Daniel\"]\n",
- "select_host = df[\"host_name\"].isin(names)\n",
- "df_select_host = df[select_host]\n",
- "df_select_host"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " name | \n",
- " host_id | \n",
- " host_name | \n",
- " neighbourhood_group | \n",
- " neighbourhood | \n",
- " latitude | \n",
- " longitude | \n",
- " room_type | \n",
- " price | \n",
- " minimum_nights | \n",
- " number_of_reviews | \n",
- " last_review | \n",
- " reviews_per_month | \n",
- " calculated_host_listings_count | \n",
- " availability_365 | \n",
- " adjusted_price | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 2595 | \n",
- " Skylit Midtown Castle | \n",
- " 2845 | \n",
- " Jennifer | \n",
- " Manhattan | \n",
- " Midtown | \n",
- " 40.75362 | \n",
- " -73.98377 | \n",
- " Entire home/apt | \n",
- " 175 | \n",
- " 3 | \n",
- " 48 | \n",
- " 2019-11-04 | \n",
- " 0.37 | \n",
- " 2 | \n",
- " 365 | \n",
- " 275 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 3831 | \n",
- " Whole flr w/private bdrm, bath & kitchen(pls r... | \n",
- " 4869 | \n",
- " LisaRoxanne | \n",
- " Brooklyn | \n",
- " Clinton Hill | \n",
- " 40.68514 | \n",
- " -73.95976 | \n",
- " Entire home/apt | \n",
- " 75 | \n",
- " 1 | \n",
- " 340 | \n",
- " 2020-08-01 | \n",
- " 4.75 | \n",
- " 1 | \n",
- " 265 | \n",
- " 175 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 5121 | \n",
- " BlissArtsSpace! | \n",
- " 7356 | \n",
- " Garon | \n",
- " Brooklyn | \n",
- " Bedford-Stuyvesant | \n",
- " 40.68688 | \n",
- " -73.95596 | \n",
- " Private room | \n",
- " 60 | \n",
- " 29 | \n",
- " 50 | \n",
- " 2019-12-02 | \n",
- " 0.37 | \n",
- " 1 | \n",
- " 365 | \n",
- " 160 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 5136 | \n",
- " Spacious Brooklyn Duplex, Patio + Garden | \n",
- " 7378 | \n",
- " Rebecca | \n",
- " Brooklyn | \n",
- " Sunset Park | \n",
- " 40.66120 | \n",
- " -73.99423 | \n",
- " Entire home/apt | \n",
- " 175 | \n",
- " 14 | \n",
- " 1 | \n",
- " 2014-01-02 | \n",
- " 0.01 | \n",
- " 1 | \n",
- " 295 | \n",
- " 275 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 5178 | \n",
- " Large Furnished Room Near B'way | \n",
- " 8967 | \n",
- " Shunichi | \n",
- " Manhattan | \n",
- " Hell's Kitchen | \n",
- " 40.76489 | \n",
- " -73.98493 | \n",
- " Private room | \n",
- " 65 | \n",
- " 2 | \n",
- " 473 | \n",
- " 2020-03-15 | \n",
- " 3.44 | \n",
- " 1 | \n",
- " 340 | \n",
- " 165 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 46522 | \n",
- " 44807522 | \n",
- " Designer Gramercy Studio Townhouse by UNSQ | \n",
- " 12941925 | \n",
- " Brian | \n",
- " Manhattan | \n",
- " Gramercy | \n",
- " 40.73433 | \n",
- " -73.98383 | \n",
- " Entire home/apt | \n",
- " 145 | \n",
- " 7 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 164 | \n",
- " 245 | \n",
- "
\n",
- " \n",
- " | 46523 | \n",
- " 44807786 | \n",
- " Cozy & comfy apt in the heart of Inwood Manhattan | \n",
- " 284790520 | \n",
- " Salar | \n",
- " Manhattan | \n",
- " Washington Heights | \n",
- " 40.85820 | \n",
- " -73.92733 | \n",
- " Entire home/apt | \n",
- " 87 | \n",
- " 6 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 2 | \n",
- " 85 | \n",
- " 187 | \n",
- "
\n",
- " \n",
- " | 46524 | \n",
- " 44811717 | \n",
- " Comfortable safe environment 24hr security camera | \n",
- " 362453686 | \n",
- " Nicole | \n",
- " Brooklyn | \n",
- " East Flatbush | \n",
- " 40.65399 | \n",
- " -73.93287 | \n",
- " Private room | \n",
- " 59 | \n",
- " 3 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 90 | \n",
- " 159 | \n",
- "
\n",
- " \n",
- " | 46525 | \n",
- " 44814944 | \n",
- " Upper West Side studio 86th Street | \n",
- " 4039777 | \n",
- " Fernando | \n",
- " Manhattan | \n",
- " Upper West Side | \n",
- " 40.78731 | \n",
- " -73.97029 | \n",
- " Entire home/apt | \n",
- " 80 | \n",
- " 30 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 113 | \n",
- " 180 | \n",
- "
\n",
- " \n",
- " | 46526 | \n",
- " 44818009 | \n",
- " 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN | \n",
- " 48098268 | \n",
- " Marina | \n",
- " Brooklyn | \n",
- " Gravesend | \n",
- " 40.59945 | \n",
- " -73.98209 | \n",
- " Private room | \n",
- " 66 | \n",
- " 1 | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 1 | \n",
- " 38 | \n",
- " 166 | \n",
- "
\n",
- " \n",
- "
\n",
- "
46527 rows × 17 columns
\n",
- "
"
- ],
- "text/plain": [
- " id name host_id \\\n",
- "0 2595 Skylit Midtown Castle 2845 \n",
- "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n",
- "2 5121 BlissArtsSpace! 7356 \n",
- "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n",
- "4 5178 Large Furnished Room Near B'way 8967 \n",
- "... ... ... ... \n",
- "46522 44807522 Designer Gramercy Studio Townhouse by UNSQ 12941925 \n",
- "46523 44807786 Cozy & comfy apt in the heart of Inwood Manhattan 284790520 \n",
- "46524 44811717 Comfortable safe environment 24hr security camera 362453686 \n",
- "46525 44814944 Upper West Side studio 86th Street 4039777 \n",
- "46526 44818009 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN 48098268 \n",
- "\n",
- " host_name neighbourhood_group neighbourhood latitude \\\n",
- "0 Jennifer Manhattan Midtown 40.75362 \n",
- "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 \n",
- "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 \n",
- "3 Rebecca Brooklyn Sunset Park 40.66120 \n",
- "4 Shunichi Manhattan Hell's Kitchen 40.76489 \n",
- "... ... ... ... ... \n",
- "46522 Brian Manhattan Gramercy 40.73433 \n",
- "46523 Salar Manhattan Washington Heights 40.85820 \n",
- "46524 Nicole Brooklyn East Flatbush 40.65399 \n",
- "46525 Fernando Manhattan Upper West Side 40.78731 \n",
- "46526 Marina Brooklyn Gravesend 40.59945 \n",
- "\n",
- " longitude room_type price minimum_nights number_of_reviews \\\n",
- "0 -73.98377 Entire home/apt 175 3 48 \n",
- "1 -73.95976 Entire home/apt 75 1 340 \n",
- "2 -73.95596 Private room 60 29 50 \n",
- "3 -73.99423 Entire home/apt 175 14 1 \n",
- "4 -73.98493 Private room 65 2 473 \n",
- "... ... ... ... ... ... \n",
- "46522 -73.98383 Entire home/apt 145 7 0 \n",
- "46523 -73.92733 Entire home/apt 87 6 0 \n",
- "46524 -73.93287 Private room 59 3 0 \n",
- "46525 -73.97029 Entire home/apt 80 30 0 \n",
- "46526 -73.98209 Private room 66 1 0 \n",
- "\n",
- " last_review reviews_per_month calculated_host_listings_count \\\n",
- "0 2019-11-04 0.37 2 \n",
- "1 2020-08-01 4.75 1 \n",
- "2 2019-12-02 0.37 1 \n",
- "3 2014-01-02 0.01 1 \n",
- "4 2020-03-15 3.44 1 \n",
- "... ... ... ... \n",
- "46522 NaN NaN 1 \n",
- "46523 NaN NaN 2 \n",
- "46524 NaN NaN 1 \n",
- "46525 NaN NaN 1 \n",
- "46526 NaN NaN 1 \n",
- "\n",
- " availability_365 adjusted_price \n",
- "0 365 275 \n",
- "1 265 175 \n",
- "2 365 160 \n",
- "3 295 275 \n",
- "4 340 165 \n",
- "... ... ... \n",
- "46522 164 245 \n",
- "46523 85 187 \n",
- "46524 90 159 \n",
- "46525 113 180 \n",
- "46526 38 166 \n",
- "\n",
- "[46527 rows x 17 columns]"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. \n",
- "# The prices for all other listings should be the same as the were before. \n",
- "df[\"adjusted_price\"] = df[\"price\"]+100\n",
- "df\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "room_type\n",
- "Entire home/apt 23998\n",
- "Private room 21144\n",
- "Shared room 987\n",
- "Hotel room 398\n",
- "dtype: int64"
- ]
- },
- "execution_count": 47,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 6. What % of the rooms are private, and what % of the rooms are shared. \n",
- "\n",
- "room_types = df.value_counts(\"room_type\")\n",
- "room_types\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "46527"
- ]
- },
- "execution_count": 49,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "total_rooms = room_types.sum()\n",
- "total_rooms "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "percenatge_private_room: 0.45444580566122894\n",
- "percenatge_shared_room: 0.0212134889419047\n"
- ]
- }
- ],
- "source": [
- "private_room = room_types[\"Private room\"]\n",
- "shared_room = room_types[\"Shared room\"]\n",
- "percenatge_private_room = private_room / total_rooms\n",
- "percenatge_shared_room = shared_room / total_rooms\n",
- "\n",
- "print(\"percenatge_private_room: \", percenatge_private_room)\n",
- "print(\"percenatge_shared_room: \", percenatge_shared_room )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Exercise 3 - Grouping\n",
- "\n",
- "1. Using `groupby`, count how many listings are in each neighbourhood_group.\n",
- "\n",
- "\n",
- "2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n",
- "\n",
- "\n",
- "3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n",
- "\n",
- "\n",
- "4. Using `groupby`, find the median price for each room type in each neighbourhood_group.\n",
- "\n",
- "\n",
- "5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices for each room type in each neighbourhood_group."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1. Using `groupby`, count how many listings are in each neighbourhood_group.\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 4. Using `groupby`, find the mean price for each room type in each neighbourhood_group.\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices \n",
- "# for each room type in each neighbourhood_group.\n",
- "\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Join and file saving.\n",
- "1. Load the `prices.csv` and the `n_listings.csv`\n",
- "\n",
- "\n",
- "2. Do join that keeps all the records for each table.\n",
- " * Neighbourhood groups should include ['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island',\n",
- " 'LongIsland']\n",
- " \n",
- " \n",
- "3. Save your joined csv as `joined.csv`\n",
- "\n",
- "\n",
- "4. Load your saved table and see if it looks the same or different that the DataFrame you used to create it. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1. Load the `prices.csv` and the `n_listings.csv`\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 2. Do join that keeps all the records for each table.\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Use the grammys.csv data for the next section of questions.\n",
- "\n",
- "1. Who was won Album of the Year in 2016?\n",
- "\n",
- "\n",
- "2. Who won Best Rap Album in 2009?\n",
- "\n",
- "\n",
- "3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1. Who was won Album of the Year in 2016?\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 2. Who won Best Rap Album in 2009?\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?\n",
- "\n",
- "\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
From 4cf4ab2a935ba87a3946508b11252a1a51fdcc04 Mon Sep 17 00:00:00 2001
From: WENKAI TAN <52226880+WENKAITAN@users.noreply.github.com>
Date: Thu, 16 Sep 2021 13:02:58 -0400
Subject: [PATCH 5/6] Delete .DS_Store
---
.DS_Store | Bin 6148 -> 0 bytes
1 file changed, 0 insertions(+), 0 deletions(-)
delete mode 100644 .DS_Store
diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0
Date: Fri, 6 May 2022 12:49:49 -0400
Subject: [PATCH 6/6] Created using Colaboratory
---
Penguins_WenkaiTan.ipynb | 1265 ++++++++++++++++++++++++++++++++++++++
1 file changed, 1265 insertions(+)
create mode 100644 Penguins_WenkaiTan.ipynb
diff --git a/Penguins_WenkaiTan.ipynb b/Penguins_WenkaiTan.ipynb
new file mode 100644
index 00000000..af6a6741
--- /dev/null
+++ b/Penguins_WenkaiTan.ipynb
@@ -0,0 +1,1265 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Penguins_WenkaiTan.ipynb",
+ "provenance": [],
+ "authorship_tag": "ABX9TyPfRL1NXZUansg3CbCtXijM",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "5tSFmXUiIH75",
+ "outputId": "2ecf1241-0bdf-47c5-9e7f-b6d61f6ccb32"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "2.8.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "# TensorFlow and tf.keras\n",
+ "import tensorflow as tf\n",
+ "from tensorflow import keras\n",
+ "# Layers for our neural networks\n",
+ "from tensorflow.keras.layers import Dense\n",
+ "\n",
+ "\n",
+ "# Our normal python data science stack you've come to know and love\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import sys\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "\n",
+ "print(tf.__version__)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Download the dataset\n",
+ "!wget -q https://storage.googleapis.com/download.tensorflow.org/data/palmer_penguins/penguins.csv -O /tmp/penguins.csv\n",
+ "\n",
+ "# Load a dataset into a Pandas Dataframe.\n",
+ "dataset_df = pd.read_csv(\"/tmp/penguins.csv\")\n",
+ "\n",
+ "# Display the first 3 examples.\n",
+ "dataset_df.head(5)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 207
+ },
+ "id": "cPD4tbw5IVii",
+ "outputId": "9da52760-0285-4885-bd98-8128d1696865"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "0 Adelie Torgersen 39.1 18.7 181.0 \n",
+ "1 Adelie Torgersen 39.5 17.4 186.0 \n",
+ "2 Adelie Torgersen 40.3 18.0 195.0 \n",
+ "3 Adelie Torgersen NaN NaN NaN \n",
+ "4 Adelie Torgersen 36.7 19.3 193.0 \n",
+ "\n",
+ " body_mass_g sex year \n",
+ "0 3750.0 male 2007 \n",
+ "1 3800.0 female 2007 \n",
+ "2 3250.0 female 2007 \n",
+ "3 NaN NaN 2007 \n",
+ "4 3450.0 female 2007 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " male | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 2
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#check if dataset has null values\n",
+ "dataset_df.isna().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "4h-d2XlCIX-5",
+ "outputId": "6169f094-be01-4447-c17d-359126d6ee37"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "species 0\n",
+ "island 0\n",
+ "bill_length_mm 2\n",
+ "bill_depth_mm 2\n",
+ "flipper_length_mm 2\n",
+ "body_mass_g 2\n",
+ "sex 11\n",
+ "year 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# delete all the rows that have null val\n",
+ "dataset_df = dataset_df.dropna()"
+ ],
+ "metadata": {
+ "id": "5--u42gaIhHG"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# sanity check\n",
+ "dataset_df.isna().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "I7hu9BUIItLJ",
+ "outputId": "20f40c7f-e509-48a1-f4ad-4ba2064b943a"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "species 0\n",
+ "island 0\n",
+ "bill_length_mm 0\n",
+ "bill_depth_mm 0\n",
+ "flipper_length_mm 0\n",
+ "body_mass_g 0\n",
+ "sex 0\n",
+ "year 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# one-hot encoding\n",
+ "dataset_df = pd.get_dummies(dataset_df, columns=['island', 'sex'], drop_first=True)\n",
+ "dataset_df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 271
+ },
+ "id": "AU_9b8egI2ru",
+ "outputId": "20a17fe0-bee1-46a0-df6f-faed0915c44e"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g \\\n",
+ "0 Adelie 39.1 18.7 181.0 3750.0 \n",
+ "1 Adelie 39.5 17.4 186.0 3800.0 \n",
+ "2 Adelie 40.3 18.0 195.0 3250.0 \n",
+ "4 Adelie 36.7 19.3 193.0 3450.0 \n",
+ "5 Adelie 39.3 20.6 190.0 3650.0 \n",
+ "\n",
+ " year island_Dream island_Torgersen sex_male \n",
+ "0 2007 0 1 1 \n",
+ "1 2007 0 1 0 \n",
+ "2 2007 0 1 0 \n",
+ "4 2007 0 1 0 \n",
+ "5 2007 0 1 1 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " year | \n",
+ " island_Dream | \n",
+ " island_Torgersen | \n",
+ " sex_male | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Adelie | \n",
+ " 39.3 | \n",
+ " 20.6 | \n",
+ " 190.0 | \n",
+ " 3650.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Name of the label column, and convert the categorical label into an integer.\n",
+ "label = \"species\"\n",
+ "classes = dataset_df[label].unique().tolist()\n",
+ "print(f\"Label classes: {classes}\")\n",
+ "\n",
+ "dataset_df[label] = dataset_df[label].map(classes.index)\n",
+ "dataset_df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 288
+ },
+ "id": "GmfY-TROLq0A",
+ "outputId": "acb0bd40-bf5a-42dd-bfe8-b2457cadd521"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Label classes: ['Adelie', 'Gentoo', 'Chinstrap']\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g \\\n",
+ "0 0 39.1 18.7 181.0 3750.0 \n",
+ "1 0 39.5 17.4 186.0 3800.0 \n",
+ "2 0 40.3 18.0 195.0 3250.0 \n",
+ "4 0 36.7 19.3 193.0 3450.0 \n",
+ "5 0 39.3 20.6 190.0 3650.0 \n",
+ "\n",
+ " year island_Dream island_Torgersen sex_male \n",
+ "0 2007 0 1 1 \n",
+ "1 2007 0 1 0 \n",
+ "2 2007 0 1 0 \n",
+ "4 2007 0 1 0 \n",
+ "5 2007 0 1 1 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " year | \n",
+ " island_Dream | \n",
+ " island_Torgersen | \n",
+ " sex_male | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0 | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0 | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 0 | \n",
+ " 39.3 | \n",
+ " 20.6 | \n",
+ " 190.0 | \n",
+ " 3650.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# dataset with all the features\n",
+ "X = dataset_df.drop(columns='species')\n",
+ "#X.head()\n",
+ "\n",
+ "# dataset with labels\n",
+ "y = dataset_df['species']\n",
+ "#y.head()\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)\n",
+ "\n",
+ "print('Lenght of our Training data:', X_train.shape, '\\nLength of our Testing data:', y_test.shape)\n",
+ "dataset_df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 306
+ },
+ "id": "rIMD1aNRJQix",
+ "outputId": "d50f1f8a-52bd-4795-bcc5-bdd05d60d244"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Lenght of our Training data: (266, 8) \n",
+ "Length of our Testing data: (67,)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g \\\n",
+ "0 0 39.1 18.7 181.0 3750.0 \n",
+ "1 0 39.5 17.4 186.0 3800.0 \n",
+ "2 0 40.3 18.0 195.0 3250.0 \n",
+ "4 0 36.7 19.3 193.0 3450.0 \n",
+ "5 0 39.3 20.6 190.0 3650.0 \n",
+ "\n",
+ " year island_Dream island_Torgersen sex_male \n",
+ "0 2007 0 1 1 \n",
+ "1 2007 0 1 0 \n",
+ "2 2007 0 1 0 \n",
+ "4 2007 0 1 0 \n",
+ "5 2007 0 1 1 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " year | \n",
+ " island_Dream | \n",
+ " island_Torgersen | \n",
+ " sex_male | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0 | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0 | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 0 | \n",
+ " 39.3 | \n",
+ " 20.6 | \n",
+ " 190.0 | \n",
+ " 3650.0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# to see how many kinds of species the dataset has\n",
+ "dataset_df['species'].value_counts()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "oTKViJTlJ0jZ",
+ "outputId": "1f14324c-42d2-4d62-939f-d4431694524d"
+ },
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 146\n",
+ "1 119\n",
+ "2 68\n",
+ "Name: species, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# building the neurons network with sequential function\n",
+ "# this neuron networks is made of 3 layers\n",
+ "model = tf.keras.models.Sequential(\n",
+ " [ \n",
+ " tf.keras.layers.Dense(128, activation='relu'),\n",
+ " tf.keras.layers.Dense(128, activation='relu'),\n",
+ " tf.keras.layers.Dense(3, activation='softmax')\n",
+ "]\n",
+ ")"
+ ],
+ "metadata": {
+ "id": "FcrFz-HvJ-ly"
+ },
+ "execution_count": 10,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Let's introduce a function that measures the prediction error.\n",
+ "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)"
+ ],
+ "metadata": {
+ "id": "dfPGNm2JLVdU"
+ },
+ "execution_count": 12,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# \"compile\" the model before training it. \n",
+ "model.compile(optimizer=tf.keras.optimizers.Adam(), loss=loss_fn, metrics=['acc'])"
+ ],
+ "metadata": {
+ "id": "mBcHqSPgKl6N"
+ },
+ "execution_count": 13,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "epochs = 15\n",
+ "model.fit(X_train, y_train, epochs=epochs, validation_split=0.1)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "7ntt79ccKoSw",
+ "outputId": "13b40a15-0018-4a4a-8945-3278a02ae58e"
+ },
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 1/15\n",
+ "8/8 [==============================] - 0s 21ms/step - loss: 7.2891 - acc: 0.7113 - val_loss: 13.5596 - val_acc: 0.7407\n",
+ "Epoch 2/15\n",
+ "8/8 [==============================] - 0s 11ms/step - loss: 13.1949 - acc: 0.6067 - val_loss: 20.4281 - val_acc: 0.7407\n",
+ "Epoch 3/15\n",
+ "8/8 [==============================] - 0s 11ms/step - loss: 14.5678 - acc: 0.7155 - val_loss: 8.7273 - val_acc: 0.4074\n",
+ "Epoch 4/15\n",
+ "8/8 [==============================] - 0s 9ms/step - loss: 10.9224 - acc: 0.6820 - val_loss: 9.0599 - val_acc: 0.4074\n",
+ "Epoch 5/15\n",
+ "8/8 [==============================] - 0s 11ms/step - loss: 12.6337 - acc: 0.5858 - val_loss: 12.3866 - val_acc: 0.7407\n",
+ "Epoch 6/15\n",
+ "8/8 [==============================] - 0s 12ms/step - loss: 9.9277 - acc: 0.6778 - val_loss: 7.6402 - val_acc: 0.7407\n",
+ "Epoch 7/15\n",
+ "8/8 [==============================] - 0s 10ms/step - loss: 4.7022 - acc: 0.6569 - val_loss: 11.0927 - val_acc: 0.7407\n",
+ "Epoch 8/15\n",
+ "8/8 [==============================] - 0s 10ms/step - loss: 8.5943 - acc: 0.6569 - val_loss: 12.4917 - val_acc: 0.7407\n",
+ "Epoch 9/15\n",
+ "8/8 [==============================] - 0s 9ms/step - loss: 6.7779 - acc: 0.6611 - val_loss: 9.8235 - val_acc: 0.7407\n",
+ "Epoch 10/15\n",
+ "8/8 [==============================] - 0s 10ms/step - loss: 7.4784 - acc: 0.6862 - val_loss: 16.9433 - val_acc: 0.7407\n",
+ "Epoch 11/15\n",
+ "8/8 [==============================] - 0s 9ms/step - loss: 12.4400 - acc: 0.6402 - val_loss: 16.2418 - val_acc: 0.7407\n",
+ "Epoch 12/15\n",
+ "8/8 [==============================] - 0s 10ms/step - loss: 8.9389 - acc: 0.6778 - val_loss: 11.7851 - val_acc: 0.3704\n",
+ "Epoch 13/15\n",
+ "8/8 [==============================] - 0s 9ms/step - loss: 5.7511 - acc: 0.6569 - val_loss: 7.6064 - val_acc: 0.7407\n",
+ "Epoch 14/15\n",
+ "8/8 [==============================] - 0s 11ms/step - loss: 4.8255 - acc: 0.7071 - val_loss: 5.9219 - val_acc: 0.7407\n",
+ "Epoch 15/15\n",
+ "8/8 [==============================] - 0s 9ms/step - loss: 7.6571 - acc: 0.6569 - val_loss: 9.4075 - val_acc: 0.7037\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model.summary()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "PVjyCj75PSUG",
+ "outputId": "901d2136-8e60-4c94-fdbc-0172653d7de3"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Model: \"sequential\"\n",
+ "_________________________________________________________________\n",
+ " Layer (type) Output Shape Param # \n",
+ "=================================================================\n",
+ " dense (Dense) (None, 128) 1152 \n",
+ " \n",
+ " dense_1 (Dense) (None, 128) 16512 \n",
+ " \n",
+ " dense_2 (Dense) (None, 3) 387 \n",
+ " \n",
+ "=================================================================\n",
+ "Total params: 18,051\n",
+ "Trainable params: 18,051\n",
+ "Non-trainable params: 0\n",
+ "_________________________________________________________________\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# now we evaluate our model\n",
+ "model.evaluate(X_test, y_test)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "UcVWrTYdMiSp",
+ "outputId": "cb708479-7e01-47f3-c164-b2c4d93b126f"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "3/3 [==============================] - 0s 7ms/step - loss: 2.7827 - acc: 0.5970\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[2.782670497894287, 0.5970149040222168]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## The result does not look good compare with tf decision tree from tutorial. \n",
+ "This is because neural networks are very data hungry. With less than 1,000 examples, our titanic data set is probably not big enough. There is no strict number for the amount of data you need, but at least 10,000 examples is a good bet, 100,000 is much better, and the best models use training data with examples in the millions.\n",
+ "\n",
+ "We usually don't use Neural Networks for traditional data sets like the titanic data set. They are most useful on image recognition or NLP problems, so let's move on to image recognition."
+ ],
+ "metadata": {
+ "id": "oD_dPlv5PrJK"
+ }
+ }
+ ]
+}
\ No newline at end of file