From e97b367083ef2b4ce74c1c88cbf2c44a2f964f32 Mon Sep 17 00:00:00 2001 From: WENKAI TAN <52226880+WENKAITAN@users.noreply.github.com> Date: Thu, 9 Sep 2021 12:48:41 -0400 Subject: [PATCH 1/6] Add files via upload --- Exercise.ipynb | 1306 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1306 insertions(+) create mode 100644 Exercise.ipynb diff --git a/Exercise.ipynb b/Exercise.ipynb new file mode 100644 index 00000000..49ecff9a --- /dev/null +++ b/Exercise.ipynb @@ -0,0 +1,1306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data wrangling with Pandas exercise\n", + "* For this exercise we will be using the `listings.csv` data file." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "zACK is the best teacher!!!\n" + ] + } + ], + "source": [ + "print(\"zACK is the best teacher!!!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load in the data file using `pd.read_csv()`" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt1753482019-11-040.372365
13831Whole flr w/private bdrm, bath & kitchen(pls r...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt7513402020-08-014.751265
25121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6029502019-12-020.371365
35136Spacious Brooklyn Duplex, Patio + Garden7378RebeccaBrooklynSunset Park40.66120-73.99423Entire home/apt1751412014-01-020.011295
45178Large Furnished Room Near B'way8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room6524732020-03-153.441340
\n", + "
" + ], + "text/plain": [ + " id name host_id \\\n", + "0 2595 Skylit Midtown Castle 2845 \n", + "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n", + "2 5121 BlissArtsSpace! 7356 \n", + "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n", + "4 5178 Large Furnished Room Near B'way  8967 \n", + "\n", + " host_name neighbourhood_group neighbourhood latitude longitude \\\n", + "0 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", + "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", + "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596 \n", + "3 Rebecca Brooklyn Sunset Park 40.66120 -73.99423 \n", + "4 Shunichi Manhattan Hell's Kitchen 40.76489 -73.98493 \n", + "\n", + " room_type price minimum_nights number_of_reviews last_review \\\n", + "0 Entire home/apt 175 3 48 2019-11-04 \n", + "1 Entire home/apt 75 1 340 2020-08-01 \n", + "2 Private room 60 29 50 2019-12-02 \n", + "3 Entire home/apt 175 14 1 2014-01-02 \n", + "4 Private room 65 2 473 2020-03-15 \n", + "\n", + " reviews_per_month calculated_host_listings_count availability_365 \n", + "0 0.37 2 365 \n", + "1 4.75 1 265 \n", + "2 0.37 1 365 \n", + "3 0.01 1 295 \n", + "4 3.44 1 340 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load data here\n", + "df = pd.read_csv(\"./data/listings.csv\", sep=\",\")\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(46527, 16)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercise 2 - Filtering\n", + "\n", + "Return the following subsets of the dataframe.\n", + "\n", + "1. How many listings are there with a price less than 100? \n", + "\n", + "\n", + "2. Find how many listings there are in just Brooklyn.\n", + "\n", + "\n", + "3. Find how many listings there are in Brooklyn with a price less than 100.\n", + "\n", + "\n", + "4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n", + "\n", + "\n", + "5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. The prices for all other listings should be the same as the were before. \n", + "\n", + "\n", + "6. What % of the rooms are private, and what % of the rooms are shared. \n", + " * Hint, use `.value_counts()`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(22778, 16)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 1. How many listings are there with a price less than 100? \n", + "price_lessthan_100 = df[\"price\"] < 100\n", + "\n", + "df_less_than_100 = df[price_lessthan_100]\n", + "df_less_than_100.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(18632, 16)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 2. Make a new DataFrame of listings in Brooklyn named `df_bk` \n", + "# and find how many listings in just Brooklyn.\n", + "\n", + "list_in_brk = df[\"neighbourhood_group\"] == \"Brooklyn\"\n", + "df_bk = df[list_in_brk ]\n", + "df_bk.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10473, 16)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 3. Find how many listings there are in Brooklyn with a price less than 100.\n", + "list_in_bk_and_less_than_100 = df_bk[\"price\"] < 100 \n", + "\n", + "df_bk_less_100 = df_bk[list_in_bk_and_less_than_100]\n", + "\n", + "df_bk_less_100.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
5216595LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q...64522DanielBrooklynWilliamsburg40.70933-73.96792Entire home/apt27111722020-07-141.441365
20161747Cozy, Brooklyn, Prospect Park Studio299370DavidBrooklynProspect-Lefferts Gardens40.65979-73.96180Entire home/apt9114972018-01-310.83144
20962903Beautiful modern studio apartment in heart of NYC306605DanielManhattanChelsea40.74238-73.99567Entire home/apt20515682019-12-140.67289
22064015Prime East Village 1 Bedroom146944DavidManhattanEast Village40.72807-73.98594Entire home/apt20030NaNNaN10
26074073Food & Music Dream Apartment in Williamsburg211877DanielBrooklynWilliamsburg40.71113-73.96054Entire home/apt18730902020-07-310.811261
...................................................
4636244639591Central & Stylish 1 Bedroom Apt - Heart of Che...286136716JohnManhattanChelsea40.74568-73.99694Entire home/apt110300NaNNaN3110
4639644661297Flushing Sunshine home #101361579037DanielQueensFlushing40.74603-73.82837Private room5210NaNNaN2360
4640344662157Flushing Sunshine home #102361579037DanielQueensFlushing40.74441-73.82829Private room55132020-08-163.002365
4645544697211David’s Queen Sized Room343477029DavidQueensFar Rockaway40.59460-73.75875Private room9510NaNNaN1365
4650844797527Long-Term: Furnished Apt in Nolita w/ Amenities19448640DavidManhattanNolita40.72289-73.99400Entire home/apt140300NaNNaN1282
\n", + "

1258 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " id name host_id \\\n", + "52 16595 LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... 64522 \n", + "201 61747 Cozy, Brooklyn, Prospect Park Studio 299370 \n", + "209 62903 Beautiful modern studio apartment in heart of NYC 306605 \n", + "220 64015 Prime East Village 1 Bedroom 146944 \n", + "260 74073 Food & Music Dream Apartment in Williamsburg 211877 \n", + "... ... ... ... \n", + "46362 44639591 Central & Stylish 1 Bedroom Apt - Heart of Che... 286136716 \n", + "46396 44661297 Flushing Sunshine home #101 361579037 \n", + "46403 44662157 Flushing Sunshine home #102 361579037 \n", + "46455 44697211 David’s Queen Sized Room 343477029 \n", + "46508 44797527 Long-Term: Furnished Apt in Nolita w/ Amenities 19448640 \n", + "\n", + " host_name neighbourhood_group neighbourhood latitude \\\n", + "52 Daniel Brooklyn Williamsburg 40.70933 \n", + "201 David Brooklyn Prospect-Lefferts Gardens 40.65979 \n", + "209 Daniel Manhattan Chelsea 40.74238 \n", + "220 David Manhattan East Village 40.72807 \n", + "260 Daniel Brooklyn Williamsburg 40.71113 \n", + "... ... ... ... ... \n", + "46362 John Manhattan Chelsea 40.74568 \n", + "46396 Daniel Queens Flushing 40.74603 \n", + "46403 Daniel Queens Flushing 40.74441 \n", + "46455 David Queens Far Rockaway 40.59460 \n", + "46508 David Manhattan Nolita 40.72289 \n", + "\n", + " longitude room_type price minimum_nights number_of_reviews \\\n", + "52 -73.96792 Entire home/apt 271 1 172 \n", + "201 -73.96180 Entire home/apt 91 14 97 \n", + "209 -73.99567 Entire home/apt 205 15 68 \n", + "220 -73.98594 Entire home/apt 200 3 0 \n", + "260 -73.96054 Entire home/apt 187 30 90 \n", + "... ... ... ... ... ... \n", + "46362 -73.99694 Entire home/apt 110 30 0 \n", + "46396 -73.82837 Private room 52 1 0 \n", + "46403 -73.82829 Private room 55 1 3 \n", + "46455 -73.75875 Private room 95 1 0 \n", + "46508 -73.99400 Entire home/apt 140 30 0 \n", + "\n", + " last_review reviews_per_month calculated_host_listings_count \\\n", + "52 2020-07-14 1.44 1 \n", + "201 2018-01-31 0.83 1 \n", + "209 2019-12-14 0.67 2 \n", + "220 NaN NaN 1 \n", + "260 2020-07-31 0.81 1 \n", + "... ... ... ... \n", + "46362 NaN NaN 3 \n", + "46396 NaN NaN 2 \n", + "46403 2020-08-16 3.00 2 \n", + "46455 NaN NaN 1 \n", + "46508 NaN NaN 1 \n", + "\n", + " availability_365 \n", + "52 365 \n", + "201 44 \n", + "209 89 \n", + "220 0 \n", + "260 261 \n", + "... ... \n", + "46362 110 \n", + "46396 360 \n", + "46403 365 \n", + "46455 365 \n", + "46508 282 \n", + "\n", + "[1258 rows x 16 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n", + "\n", + "names = [\"Michael\", \"David\", \"John\", \"Daniel\"]\n", + "select_host = df[\"host_name\"].isin(names)\n", + "df_select_host = df[select_host]\n", + "df_select_host" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365adjusted_price
02595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt1753482019-11-040.372365275
13831Whole flr w/private bdrm, bath & kitchen(pls r...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt7513402020-08-014.751265175
25121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6029502019-12-020.371365160
35136Spacious Brooklyn Duplex, Patio + Garden7378RebeccaBrooklynSunset Park40.66120-73.99423Entire home/apt1751412014-01-020.011295275
45178Large Furnished Room Near B'way8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room6524732020-03-153.441340165
......................................................
4652244807522Designer Gramercy Studio Townhouse by UNSQ12941925BrianManhattanGramercy40.73433-73.98383Entire home/apt14570NaNNaN1164245
4652344807786Cozy & comfy apt in the heart of Inwood Manhattan284790520SalarManhattanWashington Heights40.85820-73.92733Entire home/apt8760NaNNaN285187
4652444811717Comfortable safe environment 24hr security camera362453686NicoleBrooklynEast Flatbush40.65399-73.93287Private room5930NaNNaN190159
4652544814944Upper West Side studio 86th Street4039777FernandoManhattanUpper West Side40.78731-73.97029Entire home/apt80300NaNNaN1113180
46526448180095MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN48098268MarinaBrooklynGravesend40.59945-73.98209Private room6610NaNNaN138166
\n", + "

46527 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " id name host_id \\\n", + "0 2595 Skylit Midtown Castle 2845 \n", + "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n", + "2 5121 BlissArtsSpace! 7356 \n", + "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n", + "4 5178 Large Furnished Room Near B'way  8967 \n", + "... ... ... ... \n", + "46522 44807522 Designer Gramercy Studio Townhouse by UNSQ 12941925 \n", + "46523 44807786 Cozy & comfy apt in the heart of Inwood Manhattan 284790520 \n", + "46524 44811717 Comfortable safe environment 24hr security camera 362453686 \n", + "46525 44814944 Upper West Side studio 86th Street 4039777 \n", + "46526 44818009 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN 48098268 \n", + "\n", + " host_name neighbourhood_group neighbourhood latitude \\\n", + "0 Jennifer Manhattan Midtown 40.75362 \n", + "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 \n", + "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 \n", + "3 Rebecca Brooklyn Sunset Park 40.66120 \n", + "4 Shunichi Manhattan Hell's Kitchen 40.76489 \n", + "... ... ... ... ... \n", + "46522 Brian Manhattan Gramercy 40.73433 \n", + "46523 Salar Manhattan Washington Heights 40.85820 \n", + "46524 Nicole Brooklyn East Flatbush 40.65399 \n", + "46525 Fernando Manhattan Upper West Side 40.78731 \n", + "46526 Marina Brooklyn Gravesend 40.59945 \n", + "\n", + " longitude room_type price minimum_nights number_of_reviews \\\n", + "0 -73.98377 Entire home/apt 175 3 48 \n", + "1 -73.95976 Entire home/apt 75 1 340 \n", + "2 -73.95596 Private room 60 29 50 \n", + "3 -73.99423 Entire home/apt 175 14 1 \n", + "4 -73.98493 Private room 65 2 473 \n", + "... ... ... ... ... ... \n", + "46522 -73.98383 Entire home/apt 145 7 0 \n", + "46523 -73.92733 Entire home/apt 87 6 0 \n", + "46524 -73.93287 Private room 59 3 0 \n", + "46525 -73.97029 Entire home/apt 80 30 0 \n", + "46526 -73.98209 Private room 66 1 0 \n", + "\n", + " last_review reviews_per_month calculated_host_listings_count \\\n", + "0 2019-11-04 0.37 2 \n", + "1 2020-08-01 4.75 1 \n", + "2 2019-12-02 0.37 1 \n", + "3 2014-01-02 0.01 1 \n", + "4 2020-03-15 3.44 1 \n", + "... ... ... ... \n", + "46522 NaN NaN 1 \n", + "46523 NaN NaN 2 \n", + "46524 NaN NaN 1 \n", + "46525 NaN NaN 1 \n", + "46526 NaN NaN 1 \n", + "\n", + " availability_365 adjusted_price \n", + "0 365 275 \n", + "1 265 175 \n", + "2 365 160 \n", + "3 295 275 \n", + "4 340 165 \n", + "... ... ... \n", + "46522 164 245 \n", + "46523 85 187 \n", + "46524 90 159 \n", + "46525 113 180 \n", + "46526 38 166 \n", + "\n", + "[46527 rows x 17 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. \n", + "# The prices for all other listings should be the same as the were before. \n", + "df[\"adjusted_price\"] = df[\"price\"]+100\n", + "df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "room_type\n", + "Entire home/apt 23998\n", + "Private room 21144\n", + "Shared room 987\n", + "Hotel room 398\n", + "dtype: int64" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 6. What % of the rooms are private, and what % of the rooms are shared. \n", + "\n", + "room_types = df.value_counts(\"room_type\")\n", + "room_types\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "46527" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_rooms = room_types.sum()\n", + "total_rooms " + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "percenatge_private_room: 0.45444580566122894\n", + "percenatge_shared_room: 0.0212134889419047\n" + ] + } + ], + "source": [ + "private_room = room_types[\"Private room\"]\n", + "shared_room = room_types[\"Shared room\"]\n", + "percenatge_private_room = private_room / total_rooms\n", + "percenatge_shared_room = shared_room / total_rooms\n", + "\n", + "print(\"percenatge_private_room: \", percenatge_private_room)\n", + "print(\"percenatge_shared_room: \", percenatge_shared_room )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise 3 - Grouping\n", + "\n", + "1. Using `groupby`, count how many listings are in each neighbourhood_group.\n", + "\n", + "\n", + "2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n", + "\n", + "\n", + "3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n", + "\n", + "\n", + "4. Using `groupby`, find the median price for each room type in each neighbourhood_group.\n", + "\n", + "\n", + "5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices for each room type in each neighbourhood_group." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Using `groupby`, count how many listings are in each neighbourhood_group.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# 4. Using `groupby`, find the mean price for each room type in each neighbourhood_group.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# 5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices \n", + "# for each room type in each neighbourhood_group.\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Join and file saving.\n", + "1. Load the `prices.csv` and the `n_listings.csv`\n", + "\n", + "\n", + "2. Do join that keeps all the records for each table.\n", + " * Neighbourhood groups should include ['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island',\n", + " 'LongIsland']\n", + " \n", + " \n", + "3. Save your joined csv as `joined.csv`\n", + "\n", + "\n", + "4. Load your saved table and see if it looks the same or different that the DataFrame you used to create it. " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Load the `prices.csv` and the `n_listings.csv`\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Do join that keeps all the records for each table.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Use the grammys.csv data for the next section of questions.\n", + "\n", + "1. Who was won Album of the Year in 2016?\n", + "\n", + "\n", + "2. Who won Best Rap Album in 2009?\n", + "\n", + "\n", + "3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Who was won Album of the Year in 2016?\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Who won Best Rap Album in 2009?\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# 3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 9c81be305d1a2c9c0557787316ed0a2ee9e88a40 Mon Sep 17 00:00:00 2001 From: WENKAITAN Date: Thu, 16 Sep 2021 12:47:01 -0400 Subject: [PATCH 2/6] comment --- Week-01-Pandas/Exercise.ipynb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Week-01-Pandas/Exercise.ipynb b/Week-01-Pandas/Exercise.ipynb index ba0ed20f..94e1277d 100644 --- a/Week-01-Pandas/Exercise.ipynb +++ b/Week-01-Pandas/Exercise.ipynb @@ -8,6 +8,15 @@ "* For this exercise we will be using the `listings.csv` data file." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"zACK is the best teacher!!!\")" + ] + }, { "cell_type": "code", "execution_count": 23, From 6493cb4feba8131bb7fa2146d6e46e952b7cd832 Mon Sep 17 00:00:00 2001 From: WENKAITAN Date: Thu, 16 Sep 2021 13:00:09 -0400 Subject: [PATCH 3/6] hw --- Week-01-Pandas/Exercise.ipynb | 1026 ++++++++++++++++++++++++++++++++- 1 file changed, 1002 insertions(+), 24 deletions(-) diff --git a/Week-01-Pandas/Exercise.ipynb b/Week-01-Pandas/Exercise.ipynb index 94e1277d..49ecff9a 100644 --- a/Week-01-Pandas/Exercise.ipynb +++ b/Week-01-Pandas/Exercise.ipynb @@ -10,16 +10,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "zACK is the best teacher!!!\n" + ] + } + ], "source": [ "print(\"zACK is the best teacher!!!\")" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -36,12 +44,208 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt1753482019-11-040.372365
13831Whole flr w/private bdrm, bath & kitchen(pls r...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt7513402020-08-014.751265
25121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6029502019-12-020.371365
35136Spacious Brooklyn Duplex, Patio + Garden7378RebeccaBrooklynSunset Park40.66120-73.99423Entire home/apt1751412014-01-020.011295
45178Large Furnished Room Near B'way8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room6524732020-03-153.441340
\n", + "
" + ], + "text/plain": [ + " id name host_id \\\n", + "0 2595 Skylit Midtown Castle 2845 \n", + "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n", + "2 5121 BlissArtsSpace! 7356 \n", + "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n", + "4 5178 Large Furnished Room Near B'way  8967 \n", + "\n", + " host_name neighbourhood_group neighbourhood latitude longitude \\\n", + "0 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", + "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", + "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596 \n", + "3 Rebecca Brooklyn Sunset Park 40.66120 -73.99423 \n", + "4 Shunichi Manhattan Hell's Kitchen 40.76489 -73.98493 \n", + "\n", + " room_type price minimum_nights number_of_reviews last_review \\\n", + "0 Entire home/apt 175 3 48 2019-11-04 \n", + "1 Entire home/apt 75 1 340 2020-08-01 \n", + "2 Private room 60 29 50 2019-12-02 \n", + "3 Entire home/apt 175 14 1 2014-01-02 \n", + "4 Private room 65 2 473 2020-03-15 \n", + "\n", + " reviews_per_month calculated_host_listings_count availability_365 \n", + "0 0.37 2 365 \n", + "1 4.75 1 265 \n", + "2 0.37 1 365 \n", + "3 0.01 1 295 \n", + "4 3.44 1 340 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Load data here\n", - "\n" + "df = pd.read_csv(\"./data/listings.csv\", sep=\",\")\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(46527, 16)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" ] }, { @@ -73,69 +277,843 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(22778, 16)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 1. How many listings are there with a price less than 100? \n", + "price_lessthan_100 = df[\"price\"] < 100\n", "\n", - "\n" + "df_less_than_100 = df[price_lessthan_100]\n", + "df_less_than_100.shape" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(18632, 16)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 2. Make a new DataFrame of listings in Brooklyn named `df_bk` \n", "# and find how many listings in just Brooklyn.\n", - "\n" + "\n", + "list_in_brk = df[\"neighbourhood_group\"] == \"Brooklyn\"\n", + "df_bk = df[list_in_brk ]\n", + "df_bk.shape" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(10473, 16)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 3. Find how many listings there are in Brooklyn with a price less than 100.\n", + "list_in_bk_and_less_than_100 = df_bk[\"price\"] < 100 \n", "\n", - "\n" + "df_bk_less_100 = df_bk[list_in_bk_and_less_than_100]\n", + "\n", + "df_bk_less_100.shape" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
5216595LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q...64522DanielBrooklynWilliamsburg40.70933-73.96792Entire home/apt27111722020-07-141.441365
20161747Cozy, Brooklyn, Prospect Park Studio299370DavidBrooklynProspect-Lefferts Gardens40.65979-73.96180Entire home/apt9114972018-01-310.83144
20962903Beautiful modern studio apartment in heart of NYC306605DanielManhattanChelsea40.74238-73.99567Entire home/apt20515682019-12-140.67289
22064015Prime East Village 1 Bedroom146944DavidManhattanEast Village40.72807-73.98594Entire home/apt20030NaNNaN10
26074073Food & Music Dream Apartment in Williamsburg211877DanielBrooklynWilliamsburg40.71113-73.96054Entire home/apt18730902020-07-310.811261
...................................................
4636244639591Central & Stylish 1 Bedroom Apt - Heart of Che...286136716JohnManhattanChelsea40.74568-73.99694Entire home/apt110300NaNNaN3110
4639644661297Flushing Sunshine home #101361579037DanielQueensFlushing40.74603-73.82837Private room5210NaNNaN2360
4640344662157Flushing Sunshine home #102361579037DanielQueensFlushing40.74441-73.82829Private room55132020-08-163.002365
4645544697211David’s Queen Sized Room343477029DavidQueensFar Rockaway40.59460-73.75875Private room9510NaNNaN1365
4650844797527Long-Term: Furnished Apt in Nolita w/ Amenities19448640DavidManhattanNolita40.72289-73.99400Entire home/apt140300NaNNaN1282
\n", + "

1258 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " id name host_id \\\n", + "52 16595 LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... 64522 \n", + "201 61747 Cozy, Brooklyn, Prospect Park Studio 299370 \n", + "209 62903 Beautiful modern studio apartment in heart of NYC 306605 \n", + "220 64015 Prime East Village 1 Bedroom 146944 \n", + "260 74073 Food & Music Dream Apartment in Williamsburg 211877 \n", + "... ... ... ... \n", + "46362 44639591 Central & Stylish 1 Bedroom Apt - Heart of Che... 286136716 \n", + "46396 44661297 Flushing Sunshine home #101 361579037 \n", + "46403 44662157 Flushing Sunshine home #102 361579037 \n", + "46455 44697211 David’s Queen Sized Room 343477029 \n", + "46508 44797527 Long-Term: Furnished Apt in Nolita w/ Amenities 19448640 \n", + "\n", + " host_name neighbourhood_group neighbourhood latitude \\\n", + "52 Daniel Brooklyn Williamsburg 40.70933 \n", + "201 David Brooklyn Prospect-Lefferts Gardens 40.65979 \n", + "209 Daniel Manhattan Chelsea 40.74238 \n", + "220 David Manhattan East Village 40.72807 \n", + "260 Daniel Brooklyn Williamsburg 40.71113 \n", + "... ... ... ... ... \n", + "46362 John Manhattan Chelsea 40.74568 \n", + "46396 Daniel Queens Flushing 40.74603 \n", + "46403 Daniel Queens Flushing 40.74441 \n", + "46455 David Queens Far Rockaway 40.59460 \n", + "46508 David Manhattan Nolita 40.72289 \n", + "\n", + " longitude room_type price minimum_nights number_of_reviews \\\n", + "52 -73.96792 Entire home/apt 271 1 172 \n", + "201 -73.96180 Entire home/apt 91 14 97 \n", + "209 -73.99567 Entire home/apt 205 15 68 \n", + "220 -73.98594 Entire home/apt 200 3 0 \n", + "260 -73.96054 Entire home/apt 187 30 90 \n", + "... ... ... ... ... ... \n", + "46362 -73.99694 Entire home/apt 110 30 0 \n", + "46396 -73.82837 Private room 52 1 0 \n", + "46403 -73.82829 Private room 55 1 3 \n", + "46455 -73.75875 Private room 95 1 0 \n", + "46508 -73.99400 Entire home/apt 140 30 0 \n", + "\n", + " last_review reviews_per_month calculated_host_listings_count \\\n", + "52 2020-07-14 1.44 1 \n", + "201 2018-01-31 0.83 1 \n", + "209 2019-12-14 0.67 2 \n", + "220 NaN NaN 1 \n", + "260 2020-07-31 0.81 1 \n", + "... ... ... ... \n", + "46362 NaN NaN 3 \n", + "46396 NaN NaN 2 \n", + "46403 2020-08-16 3.00 2 \n", + "46455 NaN NaN 1 \n", + "46508 NaN NaN 1 \n", + "\n", + " availability_365 \n", + "52 365 \n", + "201 44 \n", + "209 89 \n", + "220 0 \n", + "260 261 \n", + "... ... \n", + "46362 110 \n", + "46396 360 \n", + "46403 365 \n", + "46455 365 \n", + "46508 282 \n", + "\n", + "[1258 rows x 16 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n", "\n", - "\n" + "names = [\"Michael\", \"David\", \"John\", \"Daniel\"]\n", + "select_host = df[\"host_name\"].isin(names)\n", + "df_select_host = df[select_host]\n", + "df_select_host" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365adjusted_price
02595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt1753482019-11-040.372365275
13831Whole flr w/private bdrm, bath & kitchen(pls r...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt7513402020-08-014.751265175
25121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6029502019-12-020.371365160
35136Spacious Brooklyn Duplex, Patio + Garden7378RebeccaBrooklynSunset Park40.66120-73.99423Entire home/apt1751412014-01-020.011295275
45178Large Furnished Room Near B'way8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room6524732020-03-153.441340165
......................................................
4652244807522Designer Gramercy Studio Townhouse by UNSQ12941925BrianManhattanGramercy40.73433-73.98383Entire home/apt14570NaNNaN1164245
4652344807786Cozy & comfy apt in the heart of Inwood Manhattan284790520SalarManhattanWashington Heights40.85820-73.92733Entire home/apt8760NaNNaN285187
4652444811717Comfortable safe environment 24hr security camera362453686NicoleBrooklynEast Flatbush40.65399-73.93287Private room5930NaNNaN190159
4652544814944Upper West Side studio 86th Street4039777FernandoManhattanUpper West Side40.78731-73.97029Entire home/apt80300NaNNaN1113180
46526448180095MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN48098268MarinaBrooklynGravesend40.59945-73.98209Private room6610NaNNaN138166
\n", + "

46527 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " id name host_id \\\n", + "0 2595 Skylit Midtown Castle 2845 \n", + "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n", + "2 5121 BlissArtsSpace! 7356 \n", + "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n", + "4 5178 Large Furnished Room Near B'way  8967 \n", + "... ... ... ... \n", + "46522 44807522 Designer Gramercy Studio Townhouse by UNSQ 12941925 \n", + "46523 44807786 Cozy & comfy apt in the heart of Inwood Manhattan 284790520 \n", + "46524 44811717 Comfortable safe environment 24hr security camera 362453686 \n", + "46525 44814944 Upper West Side studio 86th Street 4039777 \n", + "46526 44818009 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN 48098268 \n", + "\n", + " host_name neighbourhood_group neighbourhood latitude \\\n", + "0 Jennifer Manhattan Midtown 40.75362 \n", + "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 \n", + "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 \n", + "3 Rebecca Brooklyn Sunset Park 40.66120 \n", + "4 Shunichi Manhattan Hell's Kitchen 40.76489 \n", + "... ... ... ... ... \n", + "46522 Brian Manhattan Gramercy 40.73433 \n", + "46523 Salar Manhattan Washington Heights 40.85820 \n", + "46524 Nicole Brooklyn East Flatbush 40.65399 \n", + "46525 Fernando Manhattan Upper West Side 40.78731 \n", + "46526 Marina Brooklyn Gravesend 40.59945 \n", + "\n", + " longitude room_type price minimum_nights number_of_reviews \\\n", + "0 -73.98377 Entire home/apt 175 3 48 \n", + "1 -73.95976 Entire home/apt 75 1 340 \n", + "2 -73.95596 Private room 60 29 50 \n", + "3 -73.99423 Entire home/apt 175 14 1 \n", + "4 -73.98493 Private room 65 2 473 \n", + "... ... ... ... ... ... \n", + "46522 -73.98383 Entire home/apt 145 7 0 \n", + "46523 -73.92733 Entire home/apt 87 6 0 \n", + "46524 -73.93287 Private room 59 3 0 \n", + "46525 -73.97029 Entire home/apt 80 30 0 \n", + "46526 -73.98209 Private room 66 1 0 \n", + "\n", + " last_review reviews_per_month calculated_host_listings_count \\\n", + "0 2019-11-04 0.37 2 \n", + "1 2020-08-01 4.75 1 \n", + "2 2019-12-02 0.37 1 \n", + "3 2014-01-02 0.01 1 \n", + "4 2020-03-15 3.44 1 \n", + "... ... ... ... \n", + "46522 NaN NaN 1 \n", + "46523 NaN NaN 2 \n", + "46524 NaN NaN 1 \n", + "46525 NaN NaN 1 \n", + "46526 NaN NaN 1 \n", + "\n", + " availability_365 adjusted_price \n", + "0 365 275 \n", + "1 265 175 \n", + "2 365 160 \n", + "3 295 275 \n", + "4 340 165 \n", + "... ... ... \n", + "46522 164 245 \n", + "46523 85 187 \n", + "46524 90 159 \n", + "46525 113 180 \n", + "46526 38 166 \n", + "\n", + "[46527 rows x 17 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. \n", "# The prices for all other listings should be the same as the were before. \n", - "\n" + "df[\"adjusted_price\"] = df[\"price\"]+100\n", + "df\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "room_type\n", + "Entire home/apt 23998\n", + "Private room 21144\n", + "Shared room 987\n", + "Hotel room 398\n", + "dtype: int64" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 6. What % of the rooms are private, and what % of the rooms are shared. \n", "\n", + "room_types = df.value_counts(\"room_type\")\n", + "room_types\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "46527" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_rooms = room_types.sum()\n", + "total_rooms " + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "percenatge_private_room: 0.45444580566122894\n", + "percenatge_shared_room: 0.0212134889419047\n" + ] + } + ], + "source": [ + "private_room = room_types[\"Private room\"]\n", + "shared_room = room_types[\"Shared room\"]\n", + "percenatge_private_room = private_room / total_rooms\n", + "percenatge_shared_room = shared_room / total_rooms\n", "\n", - "\n" + "print(\"percenatge_private_room: \", percenatge_private_room)\n", + "print(\"percenatge_shared_room: \", percenatge_shared_room )" ] }, { From 18dda68156e68f1d906fdc1a023e5c08a56401cf Mon Sep 17 00:00:00 2001 From: WENKAITAN Date: Thu, 16 Sep 2021 13:02:17 -0400 Subject: [PATCH 4/6] delete duplicatef file --- .DS_Store | Bin 0 -> 6148 bytes Exercise.ipynb | 1306 ------------------------------------------------ 2 files changed, 1306 deletions(-) create mode 100644 .DS_Store delete mode 100644 Exercise.ipynb diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt1753482019-11-040.372365
13831Whole flr w/private bdrm, bath & kitchen(pls r...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt7513402020-08-014.751265
25121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6029502019-12-020.371365
35136Spacious Brooklyn Duplex, Patio + Garden7378RebeccaBrooklynSunset Park40.66120-73.99423Entire home/apt1751412014-01-020.011295
45178Large Furnished Room Near B'way8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room6524732020-03-153.441340
\n", - "" - ], - "text/plain": [ - " id name host_id \\\n", - "0 2595 Skylit Midtown Castle 2845 \n", - "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n", - "2 5121 BlissArtsSpace! 7356 \n", - "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n", - "4 5178 Large Furnished Room Near B'way  8967 \n", - "\n", - " host_name neighbourhood_group neighbourhood latitude longitude \\\n", - "0 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", - "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", - "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596 \n", - "3 Rebecca Brooklyn Sunset Park 40.66120 -73.99423 \n", - "4 Shunichi Manhattan Hell's Kitchen 40.76489 -73.98493 \n", - "\n", - " room_type price minimum_nights number_of_reviews last_review \\\n", - "0 Entire home/apt 175 3 48 2019-11-04 \n", - "1 Entire home/apt 75 1 340 2020-08-01 \n", - "2 Private room 60 29 50 2019-12-02 \n", - "3 Entire home/apt 175 14 1 2014-01-02 \n", - "4 Private room 65 2 473 2020-03-15 \n", - "\n", - " reviews_per_month calculated_host_listings_count availability_365 \n", - "0 0.37 2 365 \n", - "1 4.75 1 265 \n", - "2 0.37 1 365 \n", - "3 0.01 1 295 \n", - "4 3.44 1 340 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data here\n", - "df = pd.read_csv(\"./data/listings.csv\", sep=\",\")\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(46527, 16)" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Exercise 2 - Filtering\n", - "\n", - "Return the following subsets of the dataframe.\n", - "\n", - "1. How many listings are there with a price less than 100? \n", - "\n", - "\n", - "2. Find how many listings there are in just Brooklyn.\n", - "\n", - "\n", - "3. Find how many listings there are in Brooklyn with a price less than 100.\n", - "\n", - "\n", - "4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n", - "\n", - "\n", - "5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. The prices for all other listings should be the same as the were before. \n", - "\n", - "\n", - "6. What % of the rooms are private, and what % of the rooms are shared. \n", - " * Hint, use `.value_counts()`\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(22778, 16)" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1. How many listings are there with a price less than 100? \n", - "price_lessthan_100 = df[\"price\"] < 100\n", - "\n", - "df_less_than_100 = df[price_lessthan_100]\n", - "df_less_than_100.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(18632, 16)" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 2. Make a new DataFrame of listings in Brooklyn named `df_bk` \n", - "# and find how many listings in just Brooklyn.\n", - "\n", - "list_in_brk = df[\"neighbourhood_group\"] == \"Brooklyn\"\n", - "df_bk = df[list_in_brk ]\n", - "df_bk.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(10473, 16)" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 3. Find how many listings there are in Brooklyn with a price less than 100.\n", - "list_in_bk_and_less_than_100 = df_bk[\"price\"] < 100 \n", - "\n", - "df_bk_less_100 = df_bk[list_in_bk_and_less_than_100]\n", - "\n", - "df_bk_less_100.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
5216595LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q...64522DanielBrooklynWilliamsburg40.70933-73.96792Entire home/apt27111722020-07-141.441365
20161747Cozy, Brooklyn, Prospect Park Studio299370DavidBrooklynProspect-Lefferts Gardens40.65979-73.96180Entire home/apt9114972018-01-310.83144
20962903Beautiful modern studio apartment in heart of NYC306605DanielManhattanChelsea40.74238-73.99567Entire home/apt20515682019-12-140.67289
22064015Prime East Village 1 Bedroom146944DavidManhattanEast Village40.72807-73.98594Entire home/apt20030NaNNaN10
26074073Food & Music Dream Apartment in Williamsburg211877DanielBrooklynWilliamsburg40.71113-73.96054Entire home/apt18730902020-07-310.811261
...................................................
4636244639591Central & Stylish 1 Bedroom Apt - Heart of Che...286136716JohnManhattanChelsea40.74568-73.99694Entire home/apt110300NaNNaN3110
4639644661297Flushing Sunshine home #101361579037DanielQueensFlushing40.74603-73.82837Private room5210NaNNaN2360
4640344662157Flushing Sunshine home #102361579037DanielQueensFlushing40.74441-73.82829Private room55132020-08-163.002365
4645544697211David’s Queen Sized Room343477029DavidQueensFar Rockaway40.59460-73.75875Private room9510NaNNaN1365
4650844797527Long-Term: Furnished Apt in Nolita w/ Amenities19448640DavidManhattanNolita40.72289-73.99400Entire home/apt140300NaNNaN1282
\n", - "

1258 rows × 16 columns

\n", - "
" - ], - "text/plain": [ - " id name host_id \\\n", - "52 16595 LOFT HAVEN ~ Six Windows ~ Bricks ~ Plants ~ Q... 64522 \n", - "201 61747 Cozy, Brooklyn, Prospect Park Studio 299370 \n", - "209 62903 Beautiful modern studio apartment in heart of NYC 306605 \n", - "220 64015 Prime East Village 1 Bedroom 146944 \n", - "260 74073 Food & Music Dream Apartment in Williamsburg 211877 \n", - "... ... ... ... \n", - "46362 44639591 Central & Stylish 1 Bedroom Apt - Heart of Che... 286136716 \n", - "46396 44661297 Flushing Sunshine home #101 361579037 \n", - "46403 44662157 Flushing Sunshine home #102 361579037 \n", - "46455 44697211 David’s Queen Sized Room 343477029 \n", - "46508 44797527 Long-Term: Furnished Apt in Nolita w/ Amenities 19448640 \n", - "\n", - " host_name neighbourhood_group neighbourhood latitude \\\n", - "52 Daniel Brooklyn Williamsburg 40.70933 \n", - "201 David Brooklyn Prospect-Lefferts Gardens 40.65979 \n", - "209 Daniel Manhattan Chelsea 40.74238 \n", - "220 David Manhattan East Village 40.72807 \n", - "260 Daniel Brooklyn Williamsburg 40.71113 \n", - "... ... ... ... ... \n", - "46362 John Manhattan Chelsea 40.74568 \n", - "46396 Daniel Queens Flushing 40.74603 \n", - "46403 Daniel Queens Flushing 40.74441 \n", - "46455 David Queens Far Rockaway 40.59460 \n", - "46508 David Manhattan Nolita 40.72289 \n", - "\n", - " longitude room_type price minimum_nights number_of_reviews \\\n", - "52 -73.96792 Entire home/apt 271 1 172 \n", - "201 -73.96180 Entire home/apt 91 14 97 \n", - "209 -73.99567 Entire home/apt 205 15 68 \n", - "220 -73.98594 Entire home/apt 200 3 0 \n", - "260 -73.96054 Entire home/apt 187 30 90 \n", - "... ... ... ... ... ... \n", - "46362 -73.99694 Entire home/apt 110 30 0 \n", - "46396 -73.82837 Private room 52 1 0 \n", - "46403 -73.82829 Private room 55 1 3 \n", - "46455 -73.75875 Private room 95 1 0 \n", - "46508 -73.99400 Entire home/apt 140 30 0 \n", - "\n", - " last_review reviews_per_month calculated_host_listings_count \\\n", - "52 2020-07-14 1.44 1 \n", - "201 2018-01-31 0.83 1 \n", - "209 2019-12-14 0.67 2 \n", - "220 NaN NaN 1 \n", - "260 2020-07-31 0.81 1 \n", - "... ... ... ... \n", - "46362 NaN NaN 3 \n", - "46396 NaN NaN 2 \n", - "46403 2020-08-16 3.00 2 \n", - "46455 NaN NaN 1 \n", - "46508 NaN NaN 1 \n", - "\n", - " availability_365 \n", - "52 365 \n", - "201 44 \n", - "209 89 \n", - "220 0 \n", - "260 261 \n", - "... ... \n", - "46362 110 \n", - "46396 360 \n", - "46403 365 \n", - "46455 365 \n", - "46508 282 \n", - "\n", - "[1258 rows x 16 columns]" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 4. Using `.isin()` select anyone that has the host name of Michael, David, John, and Daniel.\n", - "\n", - "names = [\"Michael\", \"David\", \"John\", \"Daniel\"]\n", - "select_host = df[\"host_name\"].isin(names)\n", - "df_select_host = df[select_host]\n", - "df_select_host" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365adjusted_price
02595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt1753482019-11-040.372365275
13831Whole flr w/private bdrm, bath & kitchen(pls r...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt7513402020-08-014.751265175
25121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6029502019-12-020.371365160
35136Spacious Brooklyn Duplex, Patio + Garden7378RebeccaBrooklynSunset Park40.66120-73.99423Entire home/apt1751412014-01-020.011295275
45178Large Furnished Room Near B'way8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room6524732020-03-153.441340165
......................................................
4652244807522Designer Gramercy Studio Townhouse by UNSQ12941925BrianManhattanGramercy40.73433-73.98383Entire home/apt14570NaNNaN1164245
4652344807786Cozy & comfy apt in the heart of Inwood Manhattan284790520SalarManhattanWashington Heights40.85820-73.92733Entire home/apt8760NaNNaN285187
4652444811717Comfortable safe environment 24hr security camera362453686NicoleBrooklynEast Flatbush40.65399-73.93287Private room5930NaNNaN190159
4652544814944Upper West Side studio 86th Street4039777FernandoManhattanUpper West Side40.78731-73.97029Entire home/apt80300NaNNaN1113180
46526448180095MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN48098268MarinaBrooklynGravesend40.59945-73.98209Private room6610NaNNaN138166
\n", - "

46527 rows × 17 columns

\n", - "
" - ], - "text/plain": [ - " id name host_id \\\n", - "0 2595 Skylit Midtown Castle 2845 \n", - "1 3831 Whole flr w/private bdrm, bath & kitchen(pls r... 4869 \n", - "2 5121 BlissArtsSpace! 7356 \n", - "3 5136 Spacious Brooklyn Duplex, Patio + Garden 7378 \n", - "4 5178 Large Furnished Room Near B'way  8967 \n", - "... ... ... ... \n", - "46522 44807522 Designer Gramercy Studio Townhouse by UNSQ 12941925 \n", - "46523 44807786 Cozy & comfy apt in the heart of Inwood Manhattan 284790520 \n", - "46524 44811717 Comfortable safe environment 24hr security camera 362453686 \n", - "46525 44814944 Upper West Side studio 86th Street 4039777 \n", - "46526 44818009 5MIN D/N trains, NEAR THE BEACH, 50’ TO MANHATTAN 48098268 \n", - "\n", - " host_name neighbourhood_group neighbourhood latitude \\\n", - "0 Jennifer Manhattan Midtown 40.75362 \n", - "1 LisaRoxanne Brooklyn Clinton Hill 40.68514 \n", - "2 Garon Brooklyn Bedford-Stuyvesant 40.68688 \n", - "3 Rebecca Brooklyn Sunset Park 40.66120 \n", - "4 Shunichi Manhattan Hell's Kitchen 40.76489 \n", - "... ... ... ... ... \n", - "46522 Brian Manhattan Gramercy 40.73433 \n", - "46523 Salar Manhattan Washington Heights 40.85820 \n", - "46524 Nicole Brooklyn East Flatbush 40.65399 \n", - "46525 Fernando Manhattan Upper West Side 40.78731 \n", - "46526 Marina Brooklyn Gravesend 40.59945 \n", - "\n", - " longitude room_type price minimum_nights number_of_reviews \\\n", - "0 -73.98377 Entire home/apt 175 3 48 \n", - "1 -73.95976 Entire home/apt 75 1 340 \n", - "2 -73.95596 Private room 60 29 50 \n", - "3 -73.99423 Entire home/apt 175 14 1 \n", - "4 -73.98493 Private room 65 2 473 \n", - "... ... ... ... ... ... \n", - "46522 -73.98383 Entire home/apt 145 7 0 \n", - "46523 -73.92733 Entire home/apt 87 6 0 \n", - "46524 -73.93287 Private room 59 3 0 \n", - "46525 -73.97029 Entire home/apt 80 30 0 \n", - "46526 -73.98209 Private room 66 1 0 \n", - "\n", - " last_review reviews_per_month calculated_host_listings_count \\\n", - "0 2019-11-04 0.37 2 \n", - "1 2020-08-01 4.75 1 \n", - "2 2019-12-02 0.37 1 \n", - "3 2014-01-02 0.01 1 \n", - "4 2020-03-15 3.44 1 \n", - "... ... ... ... \n", - "46522 NaN NaN 1 \n", - "46523 NaN NaN 2 \n", - "46524 NaN NaN 1 \n", - "46525 NaN NaN 1 \n", - "46526 NaN NaN 1 \n", - "\n", - " availability_365 adjusted_price \n", - "0 365 275 \n", - "1 265 175 \n", - "2 365 160 \n", - "3 295 275 \n", - "4 340 165 \n", - "... ... ... \n", - "46522 164 245 \n", - "46523 85 187 \n", - "46524 90 159 \n", - "46525 113 180 \n", - "46526 38 166 \n", - "\n", - "[46527 rows x 17 columns]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 5. Create a new column called `adjusted_price` that has $100 added to every listing in Williamsburg. \n", - "# The prices for all other listings should be the same as the were before. \n", - "df[\"adjusted_price\"] = df[\"price\"]+100\n", - "df\n" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "room_type\n", - "Entire home/apt 23998\n", - "Private room 21144\n", - "Shared room 987\n", - "Hotel room 398\n", - "dtype: int64" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 6. What % of the rooms are private, and what % of the rooms are shared. \n", - "\n", - "room_types = df.value_counts(\"room_type\")\n", - "room_types\n" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "46527" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "total_rooms = room_types.sum()\n", - "total_rooms " - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "percenatge_private_room: 0.45444580566122894\n", - "percenatge_shared_room: 0.0212134889419047\n" - ] - } - ], - "source": [ - "private_room = room_types[\"Private room\"]\n", - "shared_room = room_types[\"Shared room\"]\n", - "percenatge_private_room = private_room / total_rooms\n", - "percenatge_shared_room = shared_room / total_rooms\n", - "\n", - "print(\"percenatge_private_room: \", percenatge_private_room)\n", - "print(\"percenatge_shared_room: \", percenatge_shared_room )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Exercise 3 - Grouping\n", - "\n", - "1. Using `groupby`, count how many listings are in each neighbourhood_group.\n", - "\n", - "\n", - "2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n", - "\n", - "\n", - "3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n", - "\n", - "\n", - "4. Using `groupby`, find the median price for each room type in each neighbourhood_group.\n", - "\n", - "\n", - "5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices for each room type in each neighbourhood_group." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# 1. Using `groupby`, count how many listings are in each neighbourhood_group.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "# 2. Using `groupby`, find the mean price for each of the neighbourhood_groups. \n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# 3. Using `groupby` and `.agg()`, find the min and max price for each of the neighbourhood_groups. \n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# 4. Using `groupby`, find the mean price for each room type in each neighbourhood_group.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# 5. Using `groupby` and `.agg()`, find the count, min, max, mean, median, and std of the prices \n", - "# for each room type in each neighbourhood_group.\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Join and file saving.\n", - "1. Load the `prices.csv` and the `n_listings.csv`\n", - "\n", - "\n", - "2. Do join that keeps all the records for each table.\n", - " * Neighbourhood groups should include ['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island',\n", - " 'LongIsland']\n", - " \n", - " \n", - "3. Save your joined csv as `joined.csv`\n", - "\n", - "\n", - "4. Load your saved table and see if it looks the same or different that the DataFrame you used to create it. " - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# 1. Load the `prices.csv` and the `n_listings.csv`\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# 2. Do join that keeps all the records for each table.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Use the grammys.csv data for the next section of questions.\n", - "\n", - "1. Who was won Album of the Year in 2016?\n", - "\n", - "\n", - "2. Who won Best Rap Album in 2009?\n", - "\n", - "\n", - "3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "# 1. Who was won Album of the Year in 2016?\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# 2. Who won Best Rap Album in 2009?\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "# 3. How many awards was Kendrick Lamar nomiated for, and how many did he win...?\n", - "\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 4cf4ab2a935ba87a3946508b11252a1a51fdcc04 Mon Sep 17 00:00:00 2001 From: WENKAI TAN <52226880+WENKAITAN@users.noreply.github.com> Date: Thu, 16 Sep 2021 13:02:58 -0400 Subject: [PATCH 5/6] Delete .DS_Store --- .DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Fri, 6 May 2022 12:49:49 -0400 Subject: [PATCH 6/6] Created using Colaboratory --- Penguins_WenkaiTan.ipynb | 1265 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 1265 insertions(+) create mode 100644 Penguins_WenkaiTan.ipynb diff --git a/Penguins_WenkaiTan.ipynb b/Penguins_WenkaiTan.ipynb new file mode 100644 index 00000000..af6a6741 --- /dev/null +++ b/Penguins_WenkaiTan.ipynb @@ -0,0 +1,1265 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Penguins_WenkaiTan.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyPfRL1NXZUansg3CbCtXijM", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "5tSFmXUiIH75", + "outputId": "2ecf1241-0bdf-47c5-9e7f-b6d61f6ccb32" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.8.0\n" + ] + } + ], + "source": [ + "# TensorFlow and tf.keras\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "# Layers for our neural networks\n", + "from tensorflow.keras.layers import Dense\n", + "\n", + "\n", + "# Our normal python data science stack you've come to know and love\n", + "import numpy as np\n", + "import os\n", + "import sys\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "\n", + "print(tf.__version__)" + ] + }, + { + "cell_type": "code", + "source": [ + "# Download the dataset\n", + "!wget -q https://storage.googleapis.com/download.tensorflow.org/data/palmer_penguins/penguins.csv -O /tmp/penguins.csv\n", + "\n", + "# Load a dataset into a Pandas Dataframe.\n", + "dataset_df = pd.read_csv(\"/tmp/penguins.csv\")\n", + "\n", + "# Display the first 3 examples.\n", + "dataset_df.head(5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 207 + }, + "id": "cPD4tbw5IVii", + "outputId": "9da52760-0285-4885-bd98-8128d1696865" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", + "0 Adelie Torgersen 39.1 18.7 181.0 \n", + "1 Adelie Torgersen 39.5 17.4 186.0 \n", + "2 Adelie Torgersen 40.3 18.0 195.0 \n", + "3 Adelie Torgersen NaN NaN NaN \n", + "4 Adelie Torgersen 36.7 19.3 193.0 \n", + "\n", + " body_mass_g sex year \n", + "0 3750.0 male 2007 \n", + "1 3800.0 female 2007 \n", + "2 3250.0 female 2007 \n", + "3 NaN NaN 2007 \n", + "4 3450.0 female 2007 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
0AdelieTorgersen39.118.7181.03750.0male2007
1AdelieTorgersen39.517.4186.03800.0female2007
2AdelieTorgersen40.318.0195.03250.0female2007
3AdelieTorgersenNaNNaNNaNNaNNaN2007
4AdelieTorgersen36.719.3193.03450.0female2007
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 2 + } + ] + }, + { + "cell_type": "code", + "source": [ + "#check if dataset has null values\n", + "dataset_df.isna().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "4h-d2XlCIX-5", + "outputId": "6169f094-be01-4447-c17d-359126d6ee37" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "species 0\n", + "island 0\n", + "bill_length_mm 2\n", + "bill_depth_mm 2\n", + "flipper_length_mm 2\n", + "body_mass_g 2\n", + "sex 11\n", + "year 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# delete all the rows that have null val\n", + "dataset_df = dataset_df.dropna()" + ], + "metadata": { + "id": "5--u42gaIhHG" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# sanity check\n", + "dataset_df.isna().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "I7hu9BUIItLJ", + "outputId": "20f40c7f-e509-48a1-f4ad-4ba2064b943a" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "species 0\n", + "island 0\n", + "bill_length_mm 0\n", + "bill_depth_mm 0\n", + "flipper_length_mm 0\n", + "body_mass_g 0\n", + "sex 0\n", + "year 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# one-hot encoding\n", + "dataset_df = pd.get_dummies(dataset_df, columns=['island', 'sex'], drop_first=True)\n", + "dataset_df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 271 + }, + "id": "AU_9b8egI2ru", + "outputId": "20a17fe0-bee1-46a0-df6f-faed0915c44e" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g \\\n", + "0 Adelie 39.1 18.7 181.0 3750.0 \n", + "1 Adelie 39.5 17.4 186.0 3800.0 \n", + "2 Adelie 40.3 18.0 195.0 3250.0 \n", + "4 Adelie 36.7 19.3 193.0 3450.0 \n", + "5 Adelie 39.3 20.6 190.0 3650.0 \n", + "\n", + " year island_Dream island_Torgersen sex_male \n", + "0 2007 0 1 1 \n", + "1 2007 0 1 0 \n", + "2 2007 0 1 0 \n", + "4 2007 0 1 0 \n", + "5 2007 0 1 1 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gyearisland_Dreamisland_Torgersensex_male
0Adelie39.118.7181.03750.02007011
1Adelie39.517.4186.03800.02007010
2Adelie40.318.0195.03250.02007010
4Adelie36.719.3193.03450.02007010
5Adelie39.320.6190.03650.02007011
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Name of the label column, and convert the categorical label into an integer.\n", + "label = \"species\"\n", + "classes = dataset_df[label].unique().tolist()\n", + "print(f\"Label classes: {classes}\")\n", + "\n", + "dataset_df[label] = dataset_df[label].map(classes.index)\n", + "dataset_df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 288 + }, + "id": "GmfY-TROLq0A", + "outputId": "acb0bd40-bf5a-42dd-bfe8-b2457cadd521" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Label classes: ['Adelie', 'Gentoo', 'Chinstrap']\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g \\\n", + "0 0 39.1 18.7 181.0 3750.0 \n", + "1 0 39.5 17.4 186.0 3800.0 \n", + "2 0 40.3 18.0 195.0 3250.0 \n", + "4 0 36.7 19.3 193.0 3450.0 \n", + "5 0 39.3 20.6 190.0 3650.0 \n", + "\n", + " year island_Dream island_Torgersen sex_male \n", + "0 2007 0 1 1 \n", + "1 2007 0 1 0 \n", + "2 2007 0 1 0 \n", + "4 2007 0 1 0 \n", + "5 2007 0 1 1 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gyearisland_Dreamisland_Torgersensex_male
0039.118.7181.03750.02007011
1039.517.4186.03800.02007010
2040.318.0195.03250.02007010
4036.719.3193.03450.02007010
5039.320.6190.03650.02007011
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# dataset with all the features\n", + "X = dataset_df.drop(columns='species')\n", + "#X.head()\n", + "\n", + "# dataset with labels\n", + "y = dataset_df['species']\n", + "#y.head()\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)\n", + "\n", + "print('Lenght of our Training data:', X_train.shape, '\\nLength of our Testing data:', y_test.shape)\n", + "dataset_df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 306 + }, + "id": "rIMD1aNRJQix", + "outputId": "d50f1f8a-52bd-4795-bcc5-bdd05d60d244" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Lenght of our Training data: (266, 8) \n", + "Length of our Testing data: (67,)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g \\\n", + "0 0 39.1 18.7 181.0 3750.0 \n", + "1 0 39.5 17.4 186.0 3800.0 \n", + "2 0 40.3 18.0 195.0 3250.0 \n", + "4 0 36.7 19.3 193.0 3450.0 \n", + "5 0 39.3 20.6 190.0 3650.0 \n", + "\n", + " year island_Dream island_Torgersen sex_male \n", + "0 2007 0 1 1 \n", + "1 2007 0 1 0 \n", + "2 2007 0 1 0 \n", + "4 2007 0 1 0 \n", + "5 2007 0 1 1 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gyearisland_Dreamisland_Torgersensex_male
0039.118.7181.03750.02007011
1039.517.4186.03800.02007010
2040.318.0195.03250.02007010
4036.719.3193.03450.02007010
5039.320.6190.03650.02007011
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# to see how many kinds of species the dataset has\n", + "dataset_df['species'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "oTKViJTlJ0jZ", + "outputId": "1f14324c-42d2-4d62-939f-d4431694524d" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 146\n", + "1 119\n", + "2 68\n", + "Name: species, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# building the neurons network with sequential function\n", + "# this neuron networks is made of 3 layers\n", + "model = tf.keras.models.Sequential(\n", + " [ \n", + " tf.keras.layers.Dense(128, activation='relu'),\n", + " tf.keras.layers.Dense(128, activation='relu'),\n", + " tf.keras.layers.Dense(3, activation='softmax')\n", + "]\n", + ")" + ], + "metadata": { + "id": "FcrFz-HvJ-ly" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's introduce a function that measures the prediction error.\n", + "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)" + ], + "metadata": { + "id": "dfPGNm2JLVdU" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# \"compile\" the model before training it. \n", + "model.compile(optimizer=tf.keras.optimizers.Adam(), loss=loss_fn, metrics=['acc'])" + ], + "metadata": { + "id": "mBcHqSPgKl6N" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "epochs = 15\n", + "model.fit(X_train, y_train, epochs=epochs, validation_split=0.1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "7ntt79ccKoSw", + "outputId": "13b40a15-0018-4a4a-8945-3278a02ae58e" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 1/15\n", + "8/8 [==============================] - 0s 21ms/step - loss: 7.2891 - acc: 0.7113 - val_loss: 13.5596 - val_acc: 0.7407\n", + "Epoch 2/15\n", + "8/8 [==============================] - 0s 11ms/step - loss: 13.1949 - acc: 0.6067 - val_loss: 20.4281 - val_acc: 0.7407\n", + "Epoch 3/15\n", + "8/8 [==============================] - 0s 11ms/step - loss: 14.5678 - acc: 0.7155 - val_loss: 8.7273 - val_acc: 0.4074\n", + "Epoch 4/15\n", + "8/8 [==============================] - 0s 9ms/step - loss: 10.9224 - acc: 0.6820 - val_loss: 9.0599 - val_acc: 0.4074\n", + "Epoch 5/15\n", + "8/8 [==============================] - 0s 11ms/step - loss: 12.6337 - acc: 0.5858 - val_loss: 12.3866 - val_acc: 0.7407\n", + "Epoch 6/15\n", + "8/8 [==============================] - 0s 12ms/step - loss: 9.9277 - acc: 0.6778 - val_loss: 7.6402 - val_acc: 0.7407\n", + "Epoch 7/15\n", + "8/8 [==============================] - 0s 10ms/step - loss: 4.7022 - acc: 0.6569 - val_loss: 11.0927 - val_acc: 0.7407\n", + "Epoch 8/15\n", + "8/8 [==============================] - 0s 10ms/step - loss: 8.5943 - acc: 0.6569 - val_loss: 12.4917 - val_acc: 0.7407\n", + "Epoch 9/15\n", + "8/8 [==============================] - 0s 9ms/step - loss: 6.7779 - acc: 0.6611 - val_loss: 9.8235 - val_acc: 0.7407\n", + "Epoch 10/15\n", + "8/8 [==============================] - 0s 10ms/step - loss: 7.4784 - acc: 0.6862 - val_loss: 16.9433 - val_acc: 0.7407\n", + "Epoch 11/15\n", + "8/8 [==============================] - 0s 9ms/step - loss: 12.4400 - acc: 0.6402 - val_loss: 16.2418 - val_acc: 0.7407\n", + "Epoch 12/15\n", + "8/8 [==============================] - 0s 10ms/step - loss: 8.9389 - acc: 0.6778 - val_loss: 11.7851 - val_acc: 0.3704\n", + "Epoch 13/15\n", + "8/8 [==============================] - 0s 9ms/step - loss: 5.7511 - acc: 0.6569 - val_loss: 7.6064 - val_acc: 0.7407\n", + "Epoch 14/15\n", + "8/8 [==============================] - 0s 11ms/step - loss: 4.8255 - acc: 0.7071 - val_loss: 5.9219 - val_acc: 0.7407\n", + "Epoch 15/15\n", + "8/8 [==============================] - 0s 9ms/step - loss: 7.6571 - acc: 0.6569 - val_loss: 9.4075 - val_acc: 0.7037\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "model.summary()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "PVjyCj75PSUG", + "outputId": "901d2136-8e60-4c94-fdbc-0172653d7de3" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Model: \"sequential\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " dense (Dense) (None, 128) 1152 \n", + " \n", + " dense_1 (Dense) (None, 128) 16512 \n", + " \n", + " dense_2 (Dense) (None, 3) 387 \n", + " \n", + "=================================================================\n", + "Total params: 18,051\n", + "Trainable params: 18,051\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# now we evaluate our model\n", + "model.evaluate(X_test, y_test)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "UcVWrTYdMiSp", + "outputId": "cb708479-7e01-47f3-c164-b2c4d93b126f" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3/3 [==============================] - 0s 7ms/step - loss: 2.7827 - acc: 0.5970\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[2.782670497894287, 0.5970149040222168]" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## The result does not look good compare with tf decision tree from tutorial. \n", + "This is because neural networks are very data hungry. With less than 1,000 examples, our titanic data set is probably not big enough. There is no strict number for the amount of data you need, but at least 10,000 examples is a good bet, 100,000 is much better, and the best models use training data with examples in the millions.\n", + "\n", + "We usually don't use Neural Networks for traditional data sets like the titanic data set. They are most useful on image recognition or NLP problems, so let's move on to image recognition." + ], + "metadata": { + "id": "oD_dPlv5PrJK" + } + } + ] +} \ No newline at end of file