From 760a4e4567ecf954000504081c21e86b66fa238f Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Sat, 6 Jan 2024 22:12:34 -0500
Subject: [PATCH 01/14] Assignment 2 group 7
---
.../assignment_2/group_7_ass_2_2024.ipynb | 582 ++++++++++++++----
1 file changed, 479 insertions(+), 103 deletions(-)
diff --git a/assignments/assignment_2/group_7_ass_2_2024.ipynb b/assignments/assignment_2/group_7_ass_2_2024.ipynb
index db65a409..731a3c64 100644
--- a/assignments/assignment_2/group_7_ass_2_2024.ipynb
+++ b/assignments/assignment_2/group_7_ass_2_2024.ipynb
@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -41,7 +41,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@@ -50,7 +50,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
@@ -59,7 +59,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
@@ -68,7 +68,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
@@ -77,7 +77,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 38,
"metadata": {
"scrolled": true
},
@@ -90,7 +90,7 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
@@ -111,7 +111,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
@@ -132,7 +132,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 41,
"metadata": {},
"outputs": [
{
@@ -151,7 +151,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 42,
"metadata": {},
"outputs": [
{
@@ -160,7 +160,7 @@
"'My teacher assistant is so boring.'"
]
},
- "execution_count": 18,
+ "execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
@@ -174,7 +174,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 43,
"metadata": {},
"outputs": [
{
@@ -183,7 +183,7 @@
"'My TA My teacher assistant is so boring but is very funny'"
]
},
- "execution_count": 24,
+ "execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
@@ -205,7 +205,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 44,
"metadata": {},
"outputs": [
{
@@ -226,7 +226,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 45,
"metadata": {},
"outputs": [
{
@@ -247,7 +247,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 46,
"metadata": {},
"outputs": [
{
@@ -272,7 +272,7 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 47,
"metadata": {},
"outputs": [
{
@@ -321,7 +321,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
@@ -330,13 +330,178 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I am too old\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 1. Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
+ "\n",
+ "# First, we split the string into a list of words\n",
+ "list_words = str1.split()\n",
+ "\n",
+ "# Then, we remove the empty strings from the list\n",
+ "non_empty_words = [word for word in list_words if word]\n",
+ "\n",
+ "# Lastly, we join the non-empty words back into a string\n",
+ "result_str = ' '.join(non_empty_words)\n",
+ "\n",
+ "print(result_str)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of letters: 9\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 2. Get the number of letters in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
+ "# using the len function (to printe the lenght)\n",
+ "# and verifying that the character is alphabetic (a letter), using the if function.\n",
+ "num_letters = len([char for char in str1 if char.isalpha()])\n",
+ "\n",
+ "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
+ "print(\"Number of letters:\", num_letters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of blank spaces: 85\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 3. Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# First, we create a list with the blank spaces, using the \"isspace\" method\n",
+ "blank_space = [char for char in str1 if char.isspace()]\n",
+ "\n",
+ "# Then we apply the function len() to the list to get the total number of blank spaces\n",
+ "num_blank_spaces = len(blank_space)\n",
+ "\n",
+ "# Finally, we print the result\n",
+ "print(\"Number of blank spaces:\", num_blank_spaces)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Positions of '@': [9, 11, 14, 9, 12, 8, 14, 9, 8, 16, 12, 13, 6, 9, 10, 12, 8, 11, 14, 6, 11, 7, 15, 9, 12, 8, 9, 11, 13, 6, 9, 12, 11, 16, 11]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 4. Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "# First we use the map() function to apply the specified lambda function to each element in the emails list. \n",
+ "# Then, with the lambda function takes an email address (email) and finds the position of the '@' symbol using the find() method.\n",
+ "# Also, list(...) Converts the result into a list.\n",
+ "positions_at = list(map(lambda email: email.find('@'), emails))\n",
+ "\n",
+ "# Finally, we print the results\n",
+ "print(\"Positions of '@':\", positions_at)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Contains '.edu.': [True, True, True, False, False, True, True, False, False, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 5. Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method. # \n",
+ "\n",
+ "# First, we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function checks if '.edu.' is present in each email address.\n",
+ "contains_edu_lambda = lambda email: '.edu.' in email\n",
+ "mapped_result = map(contains_edu_lambda, emails)\n",
+ "\n",
+ "# Then, we convert the mapped result into a list.\n",
+ "contains_edu_list = list(mapped_result)\n",
+ "\n",
+ "# Finally we print the result\n",
+ "print(\"Contains '.edu.':\", contains_edu_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Substrings before the first dot: ['cscornejo@pucp', 'orellana', 'karina', 'a20083223@pucp', 'abel', 'mtintaya@pucp', 'joselin', 'a20105737@pucp', 'jfgomezc@pucp', 'afrania', 'luzon', 'adrian', 'soto', 'a20132766@pucp', 'andre', 'gustavo', 'pmlozada@pucp', 'm', 'nicolas', 'gvidal@pucp', 'jane', 'm', 'alejandro', 'a20167070@pucp', 'riega', 'vlevanot@pucp', 'sesquives@pucp', 'perez', 'mariana', 'aclavo@pucp', 'a20182474@pucp', 'josue', 'fabio', 'fernanda', 'aquillatupa@pucp']\n",
+ "Number of substrings containing '@': 14\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 6. Get all the strings before the first dot '.' in each string in the emails list.\n",
+ "# First, we identify how many of them have '@'.\n",
+ "\n",
+ "# Then we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function extracts the substring before the first dot '.' and checks if it contains '@'.\n",
+ "substring_before_dot_lambda = lambda email: email.split('.')[0]\n",
+ "mapped_result = map(substring_before_dot_lambda, emails)\n",
+ "\n",
+ "# After that, we convert the mapped result into a list.\n",
+ "substring_before_dot_list = list(mapped_result)\n",
+ "\n",
+ "# Also, we count how many substrings contain '@' and print the results\n",
+ "count_with_at = sum('@' in substring for substring in substring_before_dot_list)\n",
+ "\n",
+ "print(\"Substrings before the first dot:\", substring_before_dot_list)\n",
+ "print(\"Number of substrings containing '@':\", count_with_at)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -361,14 +526,9 @@
"4. Add Rows:\n",
" a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
"\n",
-
- "# Lastly, we join the non-empty words back into a string\n",
- "result_str = ' '.join(non_empty_words)\n",
-
"5. Analysis:\n",
" a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
" b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
-
"\n",
"Suggestions:\n",
" - Use `pd.cut()` for categorizing 'Close' values.\n",
@@ -379,30 +539,21 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
+ "# 1\n",
"import pandas as pd\n",
"\n",
-
- "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
- "# using the len function (to printe the lenght)\n",
- "# and verifying that the character is alphabetic (a letter), using the if function.\n",
- "num_letters = len([char for char in str1 if char.isalpha()])\n",
- "\n",
- "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
- "print(\"Number of letters:\", num_letters)"
-
"# This code loads the data from the provided URL.\n",
"url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
"df = pd.read_csv(url)"
-
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 57,
"metadata": {},
"outputs": [
{
@@ -483,14 +634,6 @@
"
... | \n",
" \n",
" \n",
- " | 8571 | \n",
- " 12/28/2023 | \n",
- " 12.44 | \n",
- " 12.65 | \n",
- " 12.38 | \n",
- " 12.47 | \n",
- "
\n",
- " \n",
" | 8572 | \n",
" 12/29/2023 | \n",
" 12.55 | \n",
@@ -522,9 +665,17 @@
" 13.64 | \n",
" 14.13 | \n",
"
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ "
\n",
" \n",
"\n",
- "8576 rows × 5 columns
\n",
+ "8577 rows × 5 columns
\n",
""
],
"text/plain": [
@@ -535,16 +686,16 @@
"3 01/05/1990 20.11 20.11 20.11 20.11\n",
"4 01/08/1990 20.26 20.26 20.26 20.26\n",
"... ... ... ... ... ...\n",
- "8571 12/28/2023 12.44 12.65 12.38 12.47\n",
"8572 12/29/2023 12.55 13.19 12.36 12.45\n",
"8573 01/02/2024 13.22 14.23 13.10 13.20\n",
"8574 01/03/2024 13.35 14.22 13.33 14.04\n",
"8575 01/04/2024 13.93 14.20 13.64 14.13\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
"\n",
- "[8576 rows x 5 columns]"
+ "[8577 rows x 5 columns]"
]
},
- "execution_count": 2,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
@@ -556,7 +707,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 58,
"metadata": {},
"outputs": [
{
@@ -585,6 +736,7 @@
}
],
"source": [
+ "# 2\n",
"# We display the first five rows of the DataFrame.\n",
"print(\"Primeras cinco filas del DataFrame:\")\n",
"print(df.head())\n",
@@ -596,7 +748,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 59,
"metadata": {},
"outputs": [
{
@@ -762,12 +914,13 @@
"[8577 rows x 7 columns]"
]
},
- "execution_count": 3,
+ "execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
+ "# 3\n",
"# We add a 'Level' column to categorize the 'CLOSE' values\n",
"df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
" labels=['Low', 'Medium', 'High'])\n",
@@ -781,7 +934,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 60,
"metadata": {},
"outputs": [
{
@@ -947,12 +1100,13 @@
"[8578 rows x 7 columns]"
]
},
- "execution_count": 4,
+ "execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
+ "# 4\n",
"# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
"df = df.drop(['Level', 'Year'], axis=1)\n",
"\n",
@@ -975,88 +1129,310 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Average Close | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1990 | \n",
+ " 23.06 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1991 | \n",
+ " 18.37 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1992 | \n",
+ " 15.45 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1993 | \n",
+ " 12.69 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1994 | \n",
+ " 13.93 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 1995 | \n",
+ " 12.39 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 1996 | \n",
+ " 16.44 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 1997 | \n",
+ " 22.36 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 1998 | \n",
+ " 25.60 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 1999 | \n",
+ " 24.37 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2000 | \n",
+ " 23.32 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2001 | \n",
+ " 25.75 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 2002 | \n",
+ " 27.29 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2003 | \n",
+ " 21.98 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 2004 | \n",
+ " 15.48 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 2005 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 2006 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 2007 | \n",
+ " 17.54 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 2008 | \n",
+ " 32.70 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 2009 | \n",
+ " 31.48 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 2010 | \n",
+ " 22.55 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 2011 | \n",
+ " 24.20 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 2012 | \n",
+ " 17.80 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 2013 | \n",
+ " 14.23 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 2014 | \n",
+ " 14.18 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 2015 | \n",
+ " 16.67 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 2016 | \n",
+ " 15.83 | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 2017 | \n",
+ " 11.09 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 2018 | \n",
+ " 16.64 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 2019 | \n",
+ " 15.39 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 2020 | \n",
+ " 29.25 | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 2021 | \n",
+ " 19.66 | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 2022 | \n",
+ " 25.64 | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 2023 | \n",
+ " 16.85 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 2024 | \n",
+ " 13.62 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
"text/plain": [
- "Year\n",
- "1990 23.06\n",
- "1991 18.37\n",
- "1992 15.45\n",
- "1993 12.69\n",
- "1994 13.93\n",
- "1995 12.39\n",
- "1996 16.44\n",
- "1997 22.36\n",
- "1998 25.60\n",
- "1999 24.37\n",
- "2000 23.32\n",
- "2001 25.75\n",
- "2002 27.29\n",
- "2003 21.98\n",
- "2004 15.48\n",
- "2005 12.81\n",
- "2006 12.81\n",
- "2007 17.54\n",
- "2008 32.70\n",
- "2009 31.48\n",
- "2010 22.55\n",
- "2011 24.20\n",
- "2012 17.80\n",
- "2013 14.23\n",
- "2014 14.18\n",
- "2015 16.67\n",
- "2016 15.83\n",
- "2017 11.09\n",
- "2018 16.64\n",
- "2019 15.39\n",
- "2020 29.25\n",
- "2021 19.66\n",
- "2022 25.64\n",
- "2023 16.85\n",
- "2024 13.62\n",
- "Name: CLOSE, dtype: float64"
+ " Year Average Close\n",
+ "0 1990 23.06\n",
+ "1 1991 18.37\n",
+ "2 1992 15.45\n",
+ "3 1993 12.69\n",
+ "4 1994 13.93\n",
+ "5 1995 12.39\n",
+ "6 1996 16.44\n",
+ "7 1997 22.36\n",
+ "8 1998 25.60\n",
+ "9 1999 24.37\n",
+ "10 2000 23.32\n",
+ "11 2001 25.75\n",
+ "12 2002 27.29\n",
+ "13 2003 21.98\n",
+ "14 2004 15.48\n",
+ "15 2005 12.81\n",
+ "16 2006 12.81\n",
+ "17 2007 17.54\n",
+ "18 2008 32.70\n",
+ "19 2009 31.48\n",
+ "20 2010 22.55\n",
+ "21 2011 24.20\n",
+ "22 2012 17.80\n",
+ "23 2013 14.23\n",
+ "24 2014 14.18\n",
+ "25 2015 16.67\n",
+ "26 2016 15.83\n",
+ "27 2017 11.09\n",
+ "28 2018 16.64\n",
+ "29 2019 15.39\n",
+ "30 2020 29.25\n",
+ "31 2021 19.66\n",
+ "32 2022 25.64\n",
+ "33 2023 16.85\n",
+ "34 2024 13.62"
]
},
- "execution_count": 6,
+ "execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "# Calculamos el promedio de la columna 'Close' para cada 'Year'\n",
- "average_close_by_year = df.groupby('Year')['CLOSE'].mean().round(2)\n",
- "average_close_by_year"
+ "# 5\n",
+ "# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
+ "average_close_by_year = df.groupby('Year')['CLOSE'].mean()\n",
+ "\n",
+ "# Redondea los valores a tres decimales\n",
+ "average_close_by_year = average_close_by_year.round(2)\n",
+ "\n",
+ "# Crea un nuevo DataFrame con los resultados\n",
+ "df_average_year = pd.DataFrame({'Year': average_close_by_year.index, 'Average Close': average_close_by_year.values})\n",
+ "df_average_year"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "Level\n",
"Low 5276\n",
"Medium 2582\n",
"High 720\n",
- "dtype: int64"
+ "Name: Level, dtype: int64"
]
},
- "execution_count": 7,
+ "execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "# Contamos el número de días con niveles 'High', 'Medium' y 'Low'\n",
- "count_levels = df.groupby('Level').size()\n",
- "count_levels"
+ "# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
+ "df = df['Level'].value_counts()\n",
+ "df"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -1082,7 +1458,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.4"
+ "version": "3.9.7"
},
"toc": {
"base_numbering": 1,
From 2d13cad62d4db06968de21651b42fe88abc15698 Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Sat, 6 Jan 2024 22:14:09 -0500
Subject: [PATCH 02/14] Delete
assignments/assignment_2/group_7_ass_2_2024_modificado.ipynb
---
.../group_7_ass_2_2024_modificado.ipynb | 1103 -----------------
1 file changed, 1103 deletions(-)
delete mode 100644 assignments/assignment_2/group_7_ass_2_2024_modificado.ipynb
diff --git a/assignments/assignment_2/group_7_ass_2_2024_modificado.ipynb b/assignments/assignment_2/group_7_ass_2_2024_modificado.ipynb
deleted file mode 100644
index db65a409..00000000
--- a/assignments/assignment_2/group_7_ass_2_2024_modificado.ipynb
+++ /dev/null
@@ -1,1103 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 1 Assignment 2\n",
- "\n",
- "It is totally prohibited to use any kind of loop. You can use stackoverflow. If you copy codes from previous answers, explain each step. No explanation is `0 points`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Lists\n",
- "1. Show the indices of the `np.nan` values in the `f_list` list. We want to see this output: `The indices 0, 1, 4, 7 have np.nan values.` **Hint: Use print function and [f-strings](https://realpython.com/python-f-strings/) to insert the indices values.**
\n",
- "\n",
- "2. Replicate 4 times the values of the list `p2_list`. We expect an ouput like this: `[ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]`. **Hint: Use multiplication function in `lists`and see the output.**
\n",
- "3. Print the length of `f_list`. **Hint: Length function**
\n",
- "4. Print `My teacher assistant is so boring.` using `text1` list. **Hint: Use the [`join` function](https://stackoverflow.com/questions/493819/why-is-it-string-joinlist-instead-of-list-joinstring)**
\n",
- "5. Print `My TA is so boring, but is very funny.` using `text1` list.**Hint: Use the `join` function, and `extend` method.**
\n",
- "6. Print
\n",
- "`The max value of values1 is 86 and is located in the 0 index. `
\n",
- "`The min value of values1 is 0 and is located in the 7 index. `
**Hint: Use the `f-string`, `min`, and `max` functions.**\n",
- "
\n",
- "7. Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
- "
\n",
- "8. Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "p2_list = [ 2 , 3, 4, 5 ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "# These two lists a\n",
- "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The indices [0, 1, 4, 7] have np.nan values.\n"
- ]
- }
- ],
- "source": [
- " # Answer 1\n",
- "\n",
- "nan_values = [i for i, value in enumerate(f_list) if value is np.nan]\n",
- "\n",
- "print(f\"The indices {nan_values} have np.nan values.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5]\n"
- ]
- }
- ],
- "source": [
- " # Answer 2\n",
- "\n",
- "p2_list_2 = p2_list * 4\n",
- "\n",
- "print(p2_list_2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "8\n"
- ]
- }
- ],
- "source": [
- " # Answer 3\n",
- "\n",
- "print(len(f_list))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My teacher assistant is so boring.'"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- " # Answer 4\n",
- "\n",
- "answer_4 = ' '.join(text1)\n",
- "answer_4"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My TA My teacher assistant is so boring but is very funny'"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- " # Answer 5\n",
- " \n",
- "text1[-1] = text1[-1].rstrip('.')\n",
- " \n",
- "text_answer = ['My', 'TA']\n",
- "\n",
- "text_answer_b = ['but', 'is', 'very', 'funny']\n",
- "\n",
- "text_answer.extend(text1 + text_answer_b)\n",
- "\n",
- "answer_5 = ' '.join(text_answer)\n",
- "answer_5"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The max value of values1 is 86 and is located in the 0 index.\n"
- ]
- }
- ],
- "source": [
- " # Answer 6.1\n",
- " \n",
- "max_index = values1.index(max(values1))\n",
- " \n",
- "print(f\"The max value of values1 is {max(values1)} and is located in the {max_index} index.\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The min value of values1 is 0 and is located in the 7 index.\n"
- ]
- }
- ],
- "source": [
- " # Answer 6.2\n",
- " \n",
- "min_index = values1.index(min(values1))\n",
- " \n",
- "print(f\"The min value of values1 is {min(values1)} and is located in the {min_index} index.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Names: ('CHRISTIAN SANTOS', 'CRISTIAN NASSER', 'ANGELICA KARINA', 'JOSE ROBERTO', 'ABEL FERNANDO', 'MEIR ALVARO', 'JOSELIN ALEXANDRA', 'LEONEL ARTURO', 'JOSE FELIPE', 'AFRANIA', 'BIANCA MARIETTE', 'ADRIAN ANDRE', 'DORKAS YOMIRA JHERMY', 'LADY ALY', 'HECTOR ANDRE', 'GUSTAVO', 'PERSEO MARCELO', 'MIGUEL ALONZO', 'NICOLAS', 'ROCIO GABRIELA', 'JANE CAMILA', 'MARIA ELISA', 'ALEJANDRO', 'KARLINE ROSMELI', 'STEPHY ROSARIO', 'VALERIA CECILIA', 'SEBASTIAN RENATO', 'JUAN CARLOS', 'MARIANA', 'ANDREA BRIZETH', 'ERICK JOSUE', 'JOSUE DANIEL', 'FABIO MANUEL', 'FERNANDA NICOLLE', 'ANGELA ADELINA', 'CESAR DANTE', 'GABRIELA ISABEL', 'ANGEL MAURICIO', 'JUAN DIEGO', 'ARONE', 'PERCY ALBERTH', 'KEVIN ARTURO', 'CESAR ERNESTO', 'CÉSAR AGUSTO', 'DIANA EDITH', 'RODRIGO FRANCO', 'GRETTEL ALEXANDRA', 'ROSA ANGELA', 'DANTE OMAR', 'YAJAIRA ALEXANDRA', 'JORGE ALBERTO', 'ALEXIS')\n",
- "Last Names: ('CORNEJO SANCHEZ', 'ORELLANA QUISPE', 'MORALES CHOQUEHUANCA', 'GUIMARAY RIBEYRO', 'CAMACHO GAVIDIA', 'TINTAYA ORIHUELA', 'CHAVEZ MARTINEZ', 'FIGUEROA MURO', 'GOMEZ CRIBILLERO', 'PALOMINO SEGUÍN', 'LUZON CUEVA', 'SUAÑA ZEGARRA', 'SOTO POMACHAGUA', 'FIORENTINO MARTINEZ', 'LAMA MAVILA', 'MEZA HINOJO', 'LOZADA MURILLO', 'ZAMBRANO JIMENEZ', 'JACOBS LUQUE', 'VIDAL VIDAL', 'TORRES ANICAMA', 'LOPEZ ESTRADA', 'BOYCO ORAMS', 'DIAZ BERROSPI', 'RIEGA ESCALANTE', 'LEVANO TORRES', 'ESQUIVES BRAVO', 'PEREZ GONZALES', 'OTERO MAGUIÑA', 'CLAVO CAMPOS', 'AGUILAR GARCIA', 'CALDAS VELASQUEZ', 'SALAS NUÑEZ BORJA', 'PIZARRO VILLANES', 'QUILLATUPA MORALES', 'HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'HINOJOSA CAHUANA', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ', 'FLORES CADILLO')\n"
- ]
- }
- ],
- "source": [
- " # Answer 7\n",
- "\n",
- "last_names, names = zip(*map(lambda x: x.split(', '), last_and_name))\n",
- " # the lambda function using split divides each name from each last name\n",
- " # the function zip takes the values and puts them in tuples\n",
- " # \"last_names, names\" indicates the code to split the tuples in two separate lists\n",
- "print(\"Names:\", names)\n",
- "print(\"Last Names:\", last_names)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ']\n"
- ]
- }
- ],
- "source": [
- " # Answer 8\n",
- " \n",
- "answer_8 = [last_name for last_name, email in zip(last_names, emails) if not email]\n",
- " # zip function pairs last names with its corresponding emails into tuples\n",
- " # \"last_name for last_name, email in ... if not email\" indicates the last name for each pair if email is empty\n",
- "print(answer_8)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.3 Strings\n",
- "\n",
- "\n",
- "1. Drop the duplicated blank spaces in `str1`. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.**Hint: Use `split` method and `join` function.**\n",
- "\n",
- "2. Get the number of letters in the string .**Hint: Use `len`function.**\n",
- "\n",
- "3. Get the number of blank spaces (all of them) in the string .**Hint: Use `len`function.**\n",
- "\n",
- "4. Get the position of `@` in each string in the emails list. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "5. Identifies if exists `.edu.` in each string in the `emails` list. Get a list of Booleans. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "6. Get all the strings before the first dot `.`in each string in the `emails` list. Identifies how many of them has `@`. **Hint: Use `map`function and `find` method.**"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Dear professor, the reason why I created this new branch is to correct the fact that I mistakenly edited the main branch of my group when I was writing my part. So, I was the one in charge to complete the \"Strings\" part, and here it is. I hope you can understand this."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "str1 = 'I am too old'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.4 Pandas\n",
- "\n",
- "You can and should always ask ChatGPT, BARD, Bing, etc.\n",
- "In this exercise, you will work with financial data. Follow the steps below:\n",
- "\n",
- "1. Load Data:\n",
- " Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\n",
- " Use `pd.read_csv()` adn the link to load the data into a DataFrame.\n",
- "\n",
- "2. Explore Data:\n",
- " a) Display the first five rows of the DataFrame.\n",
- " b) Display the summary statistics of the DataFrame.\n",
- "\n",
- "3. Add Columns:\n",
- " a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it.\n",
- " b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion. \n",
- "\n",
- "4. Add Rows:\n",
- " a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
- "\n",
-
- "# Lastly, we join the non-empty words back into a string\n",
- "result_str = ' '.join(non_empty_words)\n",
-
- "5. Analysis:\n",
- " a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
- " b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
-
- "\n",
- "Suggestions:\n",
- " - Use `pd.cut()` for categorizing 'Close' values.\n",
- " - Use `pd.to_datetime()` and `dt.year` to extract the year from a date.\n",
- " - Use `DataFrame.append()` or `pd.concat()` to add rows.\n",
- " - Use `groupby()` for aggregation tasks."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "\n",
-
- "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
- "# using the len function (to printe the lenght)\n",
- "# and verifying that the character is alphabetic (a letter), using the if function.\n",
- "num_letters = len([char for char in str1 if char.isalpha()])\n",
- "\n",
- "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
- "print(\"Number of letters:\", num_letters)"
-
- "# This code loads the data from the provided URL.\n",
- "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
- "df = pd.read_csv(url)"
-
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8571 | \n",
- " 12/28/2023 | \n",
- " 12.44 | \n",
- " 12.65 | \n",
- " 12.38 | \n",
- " 12.47 | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8576 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "... ... ... ... ... ...\n",
- "8571 12/28/2023 12.44 12.65 12.38 12.47\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
- "\n",
- "[8576 rows x 5 columns]"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# We check the data assigned to the variable 'df'.\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Primeras cinco filas del DataFrame:\n",
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "\n",
- "Estadísticas resumidas del DataFrame:\n",
- " OPEN HIGH LOW CLOSE\n",
- "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
- "mean 19.666454 20.474364 18.914884 19.580374\n",
- "std 7.979066 8.439927 7.469827 7.906214\n",
- "min 9.010000 9.310000 8.560000 9.140000\n",
- "25% 13.940000 14.540000 13.400000 13.880000\n",
- "50% 17.790000 18.470000 17.220000 17.760000\n",
- "75% 23.100000 23.960000 22.320000 22.990000\n",
- "max 82.690000 89.530000 72.760000 82.690000\n"
- ]
- }
- ],
- "source": [
- "# We display the first five rows of the DataFrame.\n",
- "print(\"Primeras cinco filas del DataFrame:\")\n",
- "print(df.head())\n",
- "\n",
- "# We display the summarized statistics of the DataFrame.\n",
- "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
- "print(df.describe())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- " Low | \n",
- " 2023 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "\n",
- "[8577 rows x 7 columns]"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# We add a 'Level' column to categorize the 'CLOSE' values\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "\n",
- "# We add the 'Year' column extracted from the 'DATE' column, keeping only the year using the dt.year function.\n",
- "# Firstly, the 'DATE' variable was categorized as datetime.\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8577 | \n",
- " 01/06/2024 | \n",
- " 14.27 | \n",
- " 14.61 | \n",
- " 13.32 | \n",
- " 13.38 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8578 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
- "\n",
- "[8578 rows x 7 columns]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
- "df = df.drop(['Level', 'Year'], axis=1)\n",
- "\n",
- "# Agregamos la nueva fila solicitada\n",
- "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
- " 'OPEN': [14.27], \n",
- " 'HIGH': [14.61], \n",
- " 'LOW': [13.32], \n",
- " 'CLOSE': [13.38]}) \n",
- "\n",
- "# Agregamos la nueva fila al DataFrame existente\n",
- "df = pd.concat([df, new_row], ignore_index=True)\n",
- "\n",
- "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Year\n",
- "1990 23.06\n",
- "1991 18.37\n",
- "1992 15.45\n",
- "1993 12.69\n",
- "1994 13.93\n",
- "1995 12.39\n",
- "1996 16.44\n",
- "1997 22.36\n",
- "1998 25.60\n",
- "1999 24.37\n",
- "2000 23.32\n",
- "2001 25.75\n",
- "2002 27.29\n",
- "2003 21.98\n",
- "2004 15.48\n",
- "2005 12.81\n",
- "2006 12.81\n",
- "2007 17.54\n",
- "2008 32.70\n",
- "2009 31.48\n",
- "2010 22.55\n",
- "2011 24.20\n",
- "2012 17.80\n",
- "2013 14.23\n",
- "2014 14.18\n",
- "2015 16.67\n",
- "2016 15.83\n",
- "2017 11.09\n",
- "2018 16.64\n",
- "2019 15.39\n",
- "2020 29.25\n",
- "2021 19.66\n",
- "2022 25.64\n",
- "2023 16.85\n",
- "2024 13.62\n",
- "Name: CLOSE, dtype: float64"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Calculamos el promedio de la columna 'Close' para cada 'Year'\n",
- "average_close_by_year = df.groupby('Year')['CLOSE'].mean().round(2)\n",
- "average_close_by_year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Level\n",
- "Low 5276\n",
- "Medium 2582\n",
- "High 720\n",
- "dtype: int64"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Contamos el número de días con niveles 'High', 'Medium' y 'Low'\n",
- "count_levels = df.groupby('Level').size()\n",
- "count_levels"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.4"
- },
- "toc": {
- "base_numbering": 1,
- "nav_menu": {},
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "title_cell": "Table of Contents",
- "title_sidebar": "Contents",
- "toc_cell": false,
- "toc_position": {},
- "toc_section_display": true,
- "toc_window_display": false
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
From b679929cfaf6179b3ab67b4ec5d73512ed81d7b1 Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Sat, 6 Jan 2024 22:14:26 -0500
Subject: [PATCH 03/14] Delete assignments/assignment_2/gr_3_test.ipynb
---
assignments/assignment_2/gr_3_test.ipynb | 217 -----------------------
1 file changed, 217 deletions(-)
delete mode 100644 assignments/assignment_2/gr_3_test.ipynb
diff --git a/assignments/assignment_2/gr_3_test.ipynb b/assignments/assignment_2/gr_3_test.ipynb
deleted file mode 100644
index 374b5f0a..00000000
--- a/assignments/assignment_2/gr_3_test.ipynb
+++ /dev/null
@@ -1,217 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 1 Assignment 2\n",
- "\n",
- "It is totally prohibited to use any kind of loop. You can use stackoverflow. If you copy codes from previous answers, explain each step. No explanation is `0 points`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Lists\n",
- "1. Show the indices of the `np.nan` values in the `f_list` list. We want to see this output: `The indices 0, 1, 4, 7 have np.nan values.` **Hint: Use print function and [f-strings](https://realpython.com/python-f-strings/) to insert the indices values.**
\n",
- "\n",
- "2. Replicate 4 times the values of the list `p2_list`. We expect an ouput like this: `[ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]`. **Hint: Use multiplication function in `lists`and see the output.**
\n",
- "3. Print the length of `f_list`. **Hint: Length function**
\n",
- "4. Print `My teacher assistant is so boring.` using `text1` list. **Hint: Use the [`join` function](https://stackoverflow.com/questions/493819/why-is-it-string-joinlist-instead-of-list-joinstring)**
\n",
- "5. Print `My TA is so boring, but is very funny.` using `text1` list.**Hint: Use the `join` function, and `extend` method.**
\n",
- "6. Print
\n",
- "`The max value of values1 is 86 and is located in the 0 index. `
\n",
- "`The min value of values1 is 0 and is located in the 7 index. `
**Hint: Use the `f-string`, `min`, and `max` functions.**\n",
- "
\n",
- "7. Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
- "
\n",
- "8. Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "f_list[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "p2_list = [ 2 , 3, 4, 5 ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "# These two lists a\n",
- "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "nan_indices = [i for i, value in enumerate(f_list) if isinstance(value, (float, np.floating)) and np.isnan(value)]\n",
- "print(f\"Los índices {', '.join(map(str, nan_indices))} tienen valores np.nan.\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.3 Strings\n",
- "\n",
- "\n",
- "1. Drop the duplicated blank spaces in `str1`. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.**Hint: Use `split` method and `join` function.**\n",
- "\n",
- "2. Get the number of letters in the string .**Hint: Use `len`function.**\n",
- "\n",
- "3. Get the number of blank spaces (all of them) in the string .**Hint: Use `len`function.**\n",
- "\n",
- "4. Get the position of `@` in each string in the emails list. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "5. Identifies if exists `.edu.` in each string in the `emails` list. Get a list of Booleans. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "6. Get all the strings before the first dot `.`in each string in the `emails` list. Identifies how many of them has `@`. **Hint: Use `map`function and `find` method.**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "str1 = 'I am too old'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.4 Pandas\n",
- "\n",
- "You can and should always ask ChatGPT, BARD, Bing, etc.\n",
- "In this exercise, you will work with financial data. Follow the steps below:\n",
- "\n",
- "1. Load Data:\n",
- " Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\n",
- " Use `pd.read_csv()` adn the link to load the data into a DataFrame.\n",
- "\n",
- "2. Explore Data:\n",
- " a) Display the first five rows of the DataFrame.\n",
- " b) Display the summary statistics of the DataFrame.\n",
- "\n",
- "3. Add Columns:\n",
- " a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it.\n",
- " b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion. \n",
- "\n",
- "4. Add Rows:\n",
- " a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
- "\n",
- "5. Analysis:\n",
- " a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
- " b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
- "\n",
- "Suggestions:\n",
- " - Use `pd.cut()` for categorizing 'Close' values.\n",
- " - Use `pd.to_datetime()` and `dt.year` to extract the year from a date.\n",
- " - Use `DataFrame.append()` or `pd.concat()` to add rows.\n",
- " - Use `groupby()` for aggregation tasks."
- ]
- }
- ],
- "metadata": {
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.5"
- },
- "toc": {
- "base_numbering": 1,
- "nav_menu": {},
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "title_cell": "Table of Contents",
- "title_sidebar": "Contents",
- "toc_cell": false,
- "toc_position": {},
- "toc_section_display": true,
- "toc_window_display": false
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
From b61837f8e4081799c68325c0d3640022084fa8be Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Sat, 6 Jan 2024 22:19:07 -0500
Subject: [PATCH 04/14] Delete assignments/group_7_ass_2_2024_modificado.ipynb
---
.../group_7_ass_2_2024_modificado.ipynb | 1252 -----------------
1 file changed, 1252 deletions(-)
delete mode 100644 assignments/group_7_ass_2_2024_modificado.ipynb
diff --git a/assignments/group_7_ass_2_2024_modificado.ipynb b/assignments/group_7_ass_2_2024_modificado.ipynb
deleted file mode 100644
index d145f6b1..00000000
--- a/assignments/group_7_ass_2_2024_modificado.ipynb
+++ /dev/null
@@ -1,1252 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 1 Assignment 2\n",
- "\n",
- "It is totally prohibited to use any kind of loop. You can use stackoverflow. If you copy codes from previous answers, explain each step. No explanation is `0 points`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Lists\n",
- "1. Show the indices of the `np.nan` values in the `f_list` list. We want to see this output: `The indices 0, 1, 4, 7 have np.nan values.` **Hint: Use print function and [f-strings](https://realpython.com/python-f-strings/) to insert the indices values.**
\n",
- "\n",
- "2. Replicate 4 times the values of the list `p2_list`. We expect an ouput like this: `[ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]`. **Hint: Use multiplication function in `lists`and see the output.**
\n",
- "3. Print the length of `f_list`. **Hint: Length function**
\n",
- "4. Print `My teacher assistant is so boring.` using `text1` list. **Hint: Use the [`join` function](https://stackoverflow.com/questions/493819/why-is-it-string-joinlist-instead-of-list-joinstring)**
\n",
- "5. Print `My TA is so boring, but is very funny.` using `text1` list.**Hint: Use the `join` function, and `extend` method.**
\n",
- "6. Print
\n",
- "`The max value of values1 is 86 and is located in the 0 index. `
\n",
- "`The min value of values1 is 0 and is located in the 7 index. `
**Hint: Use the `f-string`, `min`, and `max` functions.**\n",
- "
\n",
- "7. Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
- "
\n",
- "8. Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [],
- "source": [
- "p2_list = [ 2 , 3, 4, 5 ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [],
- "source": [
- "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "metadata": {},
- "outputs": [],
- "source": [
- "# These two lists a\n",
- "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The indices [0, 1, 4, 7] have np.nan values.\n"
- ]
- }
- ],
- "source": [
- "# Answer 1\n",
- "nan_values = [i for i, value in enumerate(f_list) if value is np.nan]\n",
- "print(f\"The indices {nan_values} have np.nan values.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5]\n"
- ]
- }
- ],
- "source": [
- "# Answer 2\n",
- "p2_list_2 = p2_list * 4\n",
- "print(p2_list_2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "8\n"
- ]
- }
- ],
- "source": [
- "# Answer 3\n",
- "print(len(f_list))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My teacher assistant is so boring.'"
- ]
- },
- "execution_count": 46,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Answer 4\n",
- "answer_4 = ' '.join(text1)\n",
- "answer_4"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My TA My teacher assistant is so boring but is very funny'"
- ]
- },
- "execution_count": 47,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Answer 5\n",
- "text1[-1] = text1[-1].rstrip('.')\n",
- "text_answer = ['My', 'TA']\n",
- "text_answer_b = ['but', 'is', 'very', 'funny']\n",
- "text_answer.extend(text1 + text_answer_b)\n",
- "answer_5 = ' '.join(text_answer)\n",
- "answer_5"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 50,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The max value of values1 is 86 and is located in the 0 index.\n"
- ]
- }
- ],
- "source": [
- "# Answer 6.1\n",
- "max_index = values1.index(max(values1))\n",
- "print(f\"The max value of values1 is {max(values1)} and is located in the {max_index} index.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 51,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The min value of values1 is 0 and is located in the 7 index.\n"
- ]
- }
- ],
- "source": [
- "# Answer 6.2\n",
- "min_index = values1.index(min(values1))\n",
- "print(f\"The min value of values1 is {min(values1)} and is located in the {min_index} index.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Names: ('CHRISTIAN SANTOS', 'CRISTIAN NASSER', 'ANGELICA KARINA', 'JOSE ROBERTO', 'ABEL FERNANDO', 'MEIR ALVARO', 'JOSELIN ALEXANDRA', 'LEONEL ARTURO', 'JOSE FELIPE', 'AFRANIA', 'BIANCA MARIETTE', 'ADRIAN ANDRE', 'DORKAS YOMIRA JHERMY', 'LADY ALY', 'HECTOR ANDRE', 'GUSTAVO', 'PERSEO MARCELO', 'MIGUEL ALONZO', 'NICOLAS', 'ROCIO GABRIELA', 'JANE CAMILA', 'MARIA ELISA', 'ALEJANDRO', 'KARLINE ROSMELI', 'STEPHY ROSARIO', 'VALERIA CECILIA', 'SEBASTIAN RENATO', 'JUAN CARLOS', 'MARIANA', 'ANDREA BRIZETH', 'ERICK JOSUE', 'JOSUE DANIEL', 'FABIO MANUEL', 'FERNANDA NICOLLE', 'ANGELA ADELINA', 'CESAR DANTE', 'GABRIELA ISABEL', 'ANGEL MAURICIO', 'JUAN DIEGO', 'ARONE', 'PERCY ALBERTH', 'KEVIN ARTURO', 'CESAR ERNESTO', 'CÉSAR AGUSTO', 'DIANA EDITH', 'RODRIGO FRANCO', 'GRETTEL ALEXANDRA', 'ROSA ANGELA', 'DANTE OMAR', 'YAJAIRA ALEXANDRA', 'JORGE ALBERTO', 'ALEXIS')\n",
- "Last Names: ('CORNEJO SANCHEZ', 'ORELLANA QUISPE', 'MORALES CHOQUEHUANCA', 'GUIMARAY RIBEYRO', 'CAMACHO GAVIDIA', 'TINTAYA ORIHUELA', 'CHAVEZ MARTINEZ', 'FIGUEROA MURO', 'GOMEZ CRIBILLERO', 'PALOMINO SEGUÍN', 'LUZON CUEVA', 'SUAÑA ZEGARRA', 'SOTO POMACHAGUA', 'FIORENTINO MARTINEZ', 'LAMA MAVILA', 'MEZA HINOJO', 'LOZADA MURILLO', 'ZAMBRANO JIMENEZ', 'JACOBS LUQUE', 'VIDAL VIDAL', 'TORRES ANICAMA', 'LOPEZ ESTRADA', 'BOYCO ORAMS', 'DIAZ BERROSPI', 'RIEGA ESCALANTE', 'LEVANO TORRES', 'ESQUIVES BRAVO', 'PEREZ GONZALES', 'OTERO MAGUIÑA', 'CLAVO CAMPOS', 'AGUILAR GARCIA', 'CALDAS VELASQUEZ', 'SALAS NUÑEZ BORJA', 'PIZARRO VILLANES', 'QUILLATUPA MORALES', 'HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'HINOJOSA CAHUANA', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ', 'FLORES CADILLO')\n"
- ]
- }
- ],
- "source": [
- "# Answer 7\n",
- "last_names, names = zip(*map(lambda x: x.split(', '), last_and_name))\n",
- " # the lambda function using split divides each name from each last name\n",
- " # the function zip takes the values and puts them in tuples\n",
- " # \"last_names, names\" indicates the code to split the tuples in two separate lists\n",
- "print(\"Names:\", names)\n",
- "print(\"Last Names:\",last_names)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 56,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ']\n"
- ]
- }
- ],
- "source": [
- "# Answer 8\n",
- "answer_8 = [last_name for last_name, email in zip(last_names, emails) if not email]\n",
- " # zip function pairs last names with its corresponding emails into tuples\n",
- " # \"last_name for last_name, email in ... if not email\" indicates the last name for each pair if email is empty\n",
- "print(answer_8)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.3 Strings\n",
- "\n",
- "\n",
- "1. Drop the duplicated blank spaces in `str1`. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.**Hint: Use `split` method and `join` function.**\n",
- "\n",
- "2. Get the number of letters in the string .**Hint: Use `len`function.**\n",
- "\n",
- "3. Get the number of blank spaces (all of them) in the string .**Hint: Use `len`function.**\n",
- "\n",
- "4. Get the position of `@` in each string in the emails list. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "5. Identifies if exists `.edu.` in each string in the `emails` list. Get a list of Booleans. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "6. Get all the strings before the first dot `.`in each string in the `emails` list. Identifies how many of them has `@`. **Hint: Use `map`function and `find` method.**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [],
- "source": [
- "str1 = 'I am too old'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [],
- "source": [
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "I am too old\n"
- ]
- }
- ],
- "source": [
- "# 1. Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
- "\n",
- "# First, we split the string into a list of words\n",
- "list_words = str1.split()\n",
- "\n",
- "# Then, we remove the empty strings from the list\n",
- "non_empty_words = [word for word in list_words if word]\n",
- "\n",
- "# Lastly, we join the non-empty words back into a string\n",
- "result_str = ' '.join(non_empty_words)\n",
- "\n",
- "print(result_str)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Number of letters: 9\n"
- ]
- }
- ],
- "source": [
- "# 2. Get the number of letters in the string .Hint: Use lenfunction.\n",
- "\n",
- "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
- "# using the len function (to printe the lenght)\n",
- "# and verifying that the character is alphabetic (a letter), using the if function.\n",
- "num_letters = len([char for char in str1 if char.isalpha()])\n",
- "\n",
- "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
- "print(\"Number of letters:\",num_letters)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Number of blank spaces: 85\n"
- ]
- }
- ],
- "source": [
- "# 3. Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
- "\n",
- "# First, we create a list with the blank spaces, using the \"isspace\" method\n",
- "blank_space = [char for char in str1 if char.isspace()]\n",
- "\n",
- "# Then we apply the function len() to the list to get the total number of blank spaces\n",
- "num_blank_spaces = len(blank_space)\n",
- "\n",
- "# Finally, we print the result\n",
- "print(\"Number of blank spaces:\", num_blank_spaces)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Positions of '@': [9, 11, 14, 9, 12, 8, 14, 9, 8, 16, 12, 13, 6, 9, 10, 12, 8, 11, 14, 6, 11, 7, 15, 9, 12, 8, 9, 11, 13, 6, 9, 12, 11, 16, 11]\n"
- ]
- }
- ],
- "source": [
- "# 4. Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
- "\n",
- "# First we use the map() function to apply the specified lambda function to each element in the emails list. \n",
- "# Then, with the lambda function takes an email address (email) and finds the position of the '@' symbol using the find() method.\n",
- "# Also, list(...) Converts the result into a list.\n",
- "positions_at = list(map(lambda email: email.find('@'), emails))\n",
- "\n",
- "# Finally, we print the results\n",
- "print(\"Positions of '@':\",positions_at)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Contains '.edu.': [True, True, True, False, False, True, True, False, False, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]\n"
- ]
- }
- ],
- "source": [
- "# 5. Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method. # \n",
- "\n",
- "# First, we use the map function to apply a lambda function to each element in the emails list.\n",
- "# The lambda function checks if '.edu.' is present in each email address.\n",
- "contains_edu_lambda = lambda email: '.edu.' in email\n",
- "mapped_result = map(contains_edu_lambda, emails)\n",
- "\n",
- "# Then, we convert the mapped result into a list.\n",
- "contains_edu_list = list(mapped_result)\n",
- "\n",
- "# Finally we print the result\n",
- "print(\"Contains '.edu.':\",contains_edu_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Substrings before the first dot: ['cscornejo@pucp', 'orellana', 'karina', 'a20083223@pucp', 'abel', 'mtintaya@pucp', 'joselin', 'a20105737@pucp', 'jfgomezc@pucp', 'afrania', 'luzon', 'adrian', 'soto', 'a20132766@pucp', 'andre', 'gustavo', 'pmlozada@pucp', 'm', 'nicolas', 'gvidal@pucp', 'jane', 'm', 'alejandro', 'a20167070@pucp', 'riega', 'vlevanot@pucp', 'sesquives@pucp', 'perez', 'mariana', 'aclavo@pucp', 'a20182474@pucp', 'josue', 'fabio', 'fernanda', 'aquillatupa@pucp']\n",
- "Number of substrings containing '@': 14\n"
- ]
- }
- ],
- "source": [
- "# 6. Get all the strings before the first dot '.' in each string in the emails list.\n",
- "# First, we identify how many of them have '@'.\n",
- "\n",
- "# Then we use the map function to apply a lambda function to each element in the emails list.\n",
- "# The lambda function extracts the substring before the first dot '.' and checks if it contains '@'.\n",
- "substring_before_dot_lambda = lambda email: email.split('.')[0]\n",
- "mapped_result = map(substring_before_dot_lambda, emails)\n",
- "\n",
- "# After that, we convert the mapped result into a list.\n",
- "substring_before_dot_list = list(mapped_result)\n",
- "\n",
- "# Also, we count how many substrings contain '@' and print the results\n",
- "count_with_at = sum('@' in substring for substring in substring_before_dot_list)\n",
- "\n",
- "print(\"Substrings before the first dot:\", substring_before_dot_list)\n",
- "print(\"Number of substrings containing '@':\",count_with_at)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.4 Pandas\n",
- "\n",
- "You can and should always ask ChatGPT, BARD, Bing, etc.\n",
- "In this exercise, you will work with financial data. Follow the steps below:\n",
- "\n",
- "1. Load Data:\n",
- " Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\n",
- " Use `pd.read_csv()` adn the link to load the data into a DataFrame.\n",
- "\n",
- "2. Explore Data:\n",
- " a) Display the first five rows of the DataFrame.\n",
- " b) Display the summary statistics of the DataFrame.\n",
- "\n",
- "3. Add Columns:\n",
- " a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it.\n",
- " b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion. \n",
- "\n",
- "4. Add Rows:\n",
- " a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
- "\n",
- "5. Analysis:\n",
- " a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
- " b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
- "\n",
- "Suggestions:\n",
- " - Use `pd.cut()` for categorizing 'Close' values.\n",
- " - Use `pd.to_datetime()` and `dt.year` to extract the year from a date.\n",
- " - Use `DataFrame.append()` or `pd.concat()` to add rows.\n",
- " - Use `groupby()` for aggregation tasks."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "\n",
- "# Este código carga los datos desde la URL proporcionada\n",
- "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
- "df = pd.read_csv(url)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
- "\n",
- "[8577 rows x 5 columns]"
- ]
- },
- "execution_count": 41,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Verificamos los datos asignados a la variable df \n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Primeras cinco filas del DataFrame:\n",
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "\n",
- "Estadísticas resumidas del DataFrame:\n",
- " OPEN HIGH LOW CLOSE\n",
- "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
- "mean 19.666454 20.474364 18.914884 19.580374\n",
- "std 7.979066 8.439927 7.469827 7.906214\n",
- "min 9.010000 9.310000 8.560000 9.140000\n",
- "25% 13.940000 14.540000 13.400000 13.880000\n",
- "50% 17.790000 18.470000 17.220000 17.760000\n",
- "75% 23.100000 23.960000 22.320000 22.990000\n",
- "max 82.690000 89.530000 72.760000 82.690000\n"
- ]
- }
- ],
- "source": [
- "# Mostramos las primeras cinco filas del DataFrame\n",
- "print(\"Primeras cinco filas del DataFrame:\")\n",
- "print(df.head())\n",
- "\n",
- "# Mostramos las estadísticas resumidas del DataFrame\n",
- "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
- "print(df.describe())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Añadimos una columna 'Level' para categorizar los valores de 'CLOSE'\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "\n",
- "# Añadimos la columna 'Year' extraída de la columna 'DATE' y nos quedamos solo con el año con la función dt.year.\n",
- "# Para ello, primero se categorizo la variable 'DATE' como datetime.\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- " Low | \n",
- " 2023 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "\n",
- "[8577 rows x 7 columns]"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8577 | \n",
- " 01/06/2024 | \n",
- " 14.27 | \n",
- " 14.61 | \n",
- " 13.32 | \n",
- " 13.38 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8578 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
- "\n",
- "[8578 rows x 7 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
- "df = df.drop(['Level', 'Year'], axis=1)\n",
- "\n",
- "# Agregamos la nueva fila solicitada\n",
- "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
- " 'OPEN': [14.27], \n",
- " 'HIGH': [14.61], \n",
- " 'LOW': [13.32], \n",
- " 'CLOSE': [13.38]}) \n",
- "\n",
- "# Agregamos la nueva fila al DataFrame existente\n",
- "df = pd.concat([df, new_row], ignore_index=True)\n",
- "\n",
- "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Level', 'Year'], dtype='object')\n"
- ]
- }
- ],
- "source": [
- "print(df.columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Year\n",
- "1990 23.06\n",
- "1991 18.37\n",
- "1992 15.45\n",
- "1993 12.69\n",
- "1994 13.93\n",
- "1995 12.39\n",
- "1996 16.44\n",
- "1997 22.36\n",
- "1998 25.60\n",
- "1999 24.37\n",
- "2000 23.32\n",
- "2001 25.75\n",
- "2002 27.29\n",
- "2003 21.98\n",
- "2004 15.48\n",
- "2005 12.81\n",
- "2006 12.81\n",
- "2007 17.54\n",
- "2008 32.70\n",
- "2009 31.48\n",
- "2010 22.55\n",
- "2011 24.20\n",
- "2012 17.80\n",
- "2013 14.23\n",
- "2014 14.18\n",
- "2015 16.67\n",
- "2016 15.83\n",
- "2017 11.09\n",
- "2018 16.64\n",
- "2019 15.39\n",
- "2020 29.25\n",
- "2021 19.66\n",
- "2022 25.64\n",
- "2023 16.85\n",
- "2024 13.62\n",
- "Name: CLOSE, dtype: float64"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Calculamos el promedio de la columna 'Close' para cada 'Year'\n",
- "average_close_by_year = df.groupby('Year')['CLOSE'].mean().round(2)\n",
- "average_close_by_year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Level\n",
- "Low 5276\n",
- "Medium 2582\n",
- "High 720\n",
- "dtype: int64"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Contamos el número de días con niveles 'High', 'Medium' y 'Low'\n",
- "count_levels = df.groupby('Level').size()\n",
- "count_levels"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.4"
- },
- "toc": {
- "base_numbering": 1,
- "nav_menu": {},
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "title_cell": "Table of Contents",
- "title_sidebar": "Contents",
- "toc_cell": false,
- "toc_position": {},
- "toc_section_display": true,
- "toc_window_display": false
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
From 6415580a17faee10880098e51926298d1c3823d2 Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Sat, 6 Jan 2024 22:19:22 -0500
Subject: [PATCH 05/14] Delete assignments/assignment_2_Ilenia_Ttito.ipynb
---
assignments/assignment_2_Ilenia_Ttito.ipynb | 1177 -------------------
1 file changed, 1177 deletions(-)
delete mode 100644 assignments/assignment_2_Ilenia_Ttito.ipynb
diff --git a/assignments/assignment_2_Ilenia_Ttito.ipynb b/assignments/assignment_2_Ilenia_Ttito.ipynb
deleted file mode 100644
index 5959f6da..00000000
--- a/assignments/assignment_2_Ilenia_Ttito.ipynb
+++ /dev/null
@@ -1,1177 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 1 Assignment 2\n",
- "\n",
- "It is totally prohibited to use any kind of loop. You can use stackoverflow. If you copy codes from previous answers, explain each step. No explanation is `0 points`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Lists\n",
- "1. Show the indices of the `np.nan` values in the `f_list` list. We want to see this output: `The indices 0, 1, 4, 7 have np.nan values.` **Hint: Use print function and [f-strings](https://realpython.com/python-f-strings/) to insert the indices values.**
\n",
- "\n",
- "2. Replicate 4 times the values of the list `p2_list`. We expect an ouput like this: `[ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]`. **Hint: Use multiplication function in `lists`and see the output.**
\n",
- "3. Print the length of `f_list`. **Hint: Length function**
\n",
- "4. Print `My teacher assistant is so boring.` using `text1` list. **Hint: Use the [`join` function](https://stackoverflow.com/questions/493819/why-is-it-string-joinlist-instead-of-list-joinstring)**
\n",
- "5. Print `My TA is so boring, but is very funny.` using `text1` list.**Hint: Use the `join` function, and `extend` method.**
\n",
- "6. Print
\n",
- "`The max value of values1 is 86 and is located in the 0 index. `
\n",
- "`The min value of values1 is 0 and is located in the 7 index. `
**Hint: Use the `f-string`, `min`, and `max` functions.**\n",
- "
\n",
- "7. Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
- "
\n",
- "8. Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "p2_list = [ 2 , 3, 4, 5 ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "52"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# These two lists a\n",
- "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.3 Strings\n",
- "\n",
- "\n",
- "1. Drop the duplicated blank spaces in `str1`. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.**Hint: Use `split` method and `join` function.**\n",
- "\n",
- "2. Get the number of letters in the string .**Hint: Use `len`function.**\n",
- "\n",
- "3. Get the number of blank spaces (all of them) in the string .**Hint: Use `len`function.**\n",
- "\n",
- "4. Get the position of `@` in each string in the emails list. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "5. Identifies if exists `.edu.` in each string in the `emails` list. Get a list of Booleans. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "6. Get all the strings before the first dot `.`in each string in the `emails` list. Identifies how many of them has `@`. **Hint: Use `map`function and `find` method.**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "str1 = 'I am too old'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.4 Pandas\n",
- "\n",
- "You can and should always ask ChatGPT, BARD, Bing, etc.\n",
- "In this exercise, you will work with financial data. Follow the steps below:\n",
- "\n",
- "1. Load Data:\n",
- " Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\n",
- " Use `pd.read_csv()` adn the link to load the data into a DataFrame.\n",
- "\n",
- "2. Explore Data:\n",
- " a) Display the first five rows of the DataFrame.\n",
- " b) Display the summary statistics of the DataFrame.\n",
- "\n",
- "3. Add Columns:\n",
- " a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it.\n",
- " b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion. \n",
- "\n",
- "4. Add Rows:\n",
- " a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
- "\n",
- "5. Analysis:\n",
- " a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
- " b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
- "\n",
- "Suggestions:\n",
- " - Use `pd.cut()` for categorizing 'Close' values.\n",
- " - Use `pd.to_datetime()` and `dt.year` to extract the year from a date.\n",
- " - Use `DataFrame.append()` or `pd.concat()` to add rows.\n",
- " - Use `groupby()` for aggregation tasks."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "\n",
- "# Este código carga los datos desde la URL proporcionada\n",
- "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
- "df = pd.read_csv(url)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
- "\n",
- "[8577 rows x 5 columns]"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Verificamos los datos asignados a la variable df \n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Primeras cinco filas del DataFrame:\n",
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "\n",
- "Estadísticas resumidas del DataFrame:\n",
- " OPEN HIGH LOW CLOSE\n",
- "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
- "mean 19.666454 20.474364 18.914884 19.580374\n",
- "std 7.979066 8.439927 7.469827 7.906214\n",
- "min 9.010000 9.310000 8.560000 9.140000\n",
- "25% 13.940000 14.540000 13.400000 13.880000\n",
- "50% 17.790000 18.470000 17.220000 17.760000\n",
- "75% 23.100000 23.960000 22.320000 22.990000\n",
- "max 82.690000 89.530000 72.760000 82.690000\n"
- ]
- }
- ],
- "source": [
- "# Mostramos las primeras cinco filas del DataFrame\n",
- "print(\"Primeras cinco filas del DataFrame:\")\n",
- "print(df.head())\n",
- "\n",
- "# Mostramos las estadísticas resumidas del DataFrame\n",
- "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
- "print(df.describe())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Añadimos una columna 'Level' para categorizar los valores de 'CLOSE'\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "\n",
- "# Añadimos la columna 'Year' extraída de la columna 'DATE' y nos quedamos solo con el año con la función dt.year.\n",
- "# Para ello, primero se categorizo la variable 'DATE' como datetime.\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- " Low | \n",
- " 2023 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "\n",
- "[8577 rows x 7 columns]"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8577 | \n",
- " 01/06/2024 | \n",
- " 14.27 | \n",
- " 14.61 | \n",
- " 13.32 | \n",
- " 13.38 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8578 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
- "\n",
- "[8578 rows x 7 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
- "df = df.drop(['Level', 'Year'], axis=1)\n",
- "\n",
- "# Agregamos la nueva fila solicitada\n",
- "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
- " 'OPEN': [14.27], \n",
- " 'HIGH': [14.61], \n",
- " 'LOW': [13.32], \n",
- " 'CLOSE': [13.38]}) \n",
- "\n",
- "# Agregamos la nueva fila al DataFrame existente\n",
- "df = pd.concat([df, new_row], ignore_index=True)\n",
- "\n",
- "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Level', 'Year'], dtype='object')\n"
- ]
- }
- ],
- "source": [
- "print(df.columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Year | \n",
- " Average Close | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1990 | \n",
- " 23.06 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1991 | \n",
- " 18.37 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1992 | \n",
- " 15.45 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1993 | \n",
- " 12.69 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1994 | \n",
- " 13.93 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " 1995 | \n",
- " 12.39 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " 1996 | \n",
- " 16.44 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " 1997 | \n",
- " 22.36 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " 1998 | \n",
- " 25.60 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " 1999 | \n",
- " 24.37 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " 2000 | \n",
- " 23.32 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " 2001 | \n",
- " 25.75 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " 2002 | \n",
- " 27.29 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " 2003 | \n",
- " 21.98 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " 2004 | \n",
- " 15.48 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " 2005 | \n",
- " 12.81 | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " 2006 | \n",
- " 12.81 | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " 2007 | \n",
- " 17.54 | \n",
- "
\n",
- " \n",
- " | 18 | \n",
- " 2008 | \n",
- " 32.70 | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " 2009 | \n",
- " 31.48 | \n",
- "
\n",
- " \n",
- " | 20 | \n",
- " 2010 | \n",
- " 22.55 | \n",
- "
\n",
- " \n",
- " | 21 | \n",
- " 2011 | \n",
- " 24.20 | \n",
- "
\n",
- " \n",
- " | 22 | \n",
- " 2012 | \n",
- " 17.80 | \n",
- "
\n",
- " \n",
- " | 23 | \n",
- " 2013 | \n",
- " 14.23 | \n",
- "
\n",
- " \n",
- " | 24 | \n",
- " 2014 | \n",
- " 14.18 | \n",
- "
\n",
- " \n",
- " | 25 | \n",
- " 2015 | \n",
- " 16.67 | \n",
- "
\n",
- " \n",
- " | 26 | \n",
- " 2016 | \n",
- " 15.83 | \n",
- "
\n",
- " \n",
- " | 27 | \n",
- " 2017 | \n",
- " 11.09 | \n",
- "
\n",
- " \n",
- " | 28 | \n",
- " 2018 | \n",
- " 16.64 | \n",
- "
\n",
- " \n",
- " | 29 | \n",
- " 2019 | \n",
- " 15.39 | \n",
- "
\n",
- " \n",
- " | 30 | \n",
- " 2020 | \n",
- " 29.25 | \n",
- "
\n",
- " \n",
- " | 31 | \n",
- " 2021 | \n",
- " 19.66 | \n",
- "
\n",
- " \n",
- " | 32 | \n",
- " 2022 | \n",
- " 25.64 | \n",
- "
\n",
- " \n",
- " | 33 | \n",
- " 2023 | \n",
- " 16.85 | \n",
- "
\n",
- " \n",
- " | 34 | \n",
- " 2024 | \n",
- " 13.62 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Year Average Close\n",
- "0 1990 23.06\n",
- "1 1991 18.37\n",
- "2 1992 15.45\n",
- "3 1993 12.69\n",
- "4 1994 13.93\n",
- "5 1995 12.39\n",
- "6 1996 16.44\n",
- "7 1997 22.36\n",
- "8 1998 25.60\n",
- "9 1999 24.37\n",
- "10 2000 23.32\n",
- "11 2001 25.75\n",
- "12 2002 27.29\n",
- "13 2003 21.98\n",
- "14 2004 15.48\n",
- "15 2005 12.81\n",
- "16 2006 12.81\n",
- "17 2007 17.54\n",
- "18 2008 32.70\n",
- "19 2009 31.48\n",
- "20 2010 22.55\n",
- "21 2011 24.20\n",
- "22 2012 17.80\n",
- "23 2013 14.23\n",
- "24 2014 14.18\n",
- "25 2015 16.67\n",
- "26 2016 15.83\n",
- "27 2017 11.09\n",
- "28 2018 16.64\n",
- "29 2019 15.39\n",
- "30 2020 29.25\n",
- "31 2021 19.66\n",
- "32 2022 25.64\n",
- "33 2023 16.85\n",
- "34 2024 13.62"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
- "average_close_by_year = df.groupby('Year')['CLOSE'].mean()\n",
- "\n",
- "# Redondea los valores a tres decimales\n",
- "average_close_by_year = average_close_by_year.round(2)\n",
- "\n",
- "# Crea un nuevo DataFrame con los resultados\n",
- "df_average_year = pd.DataFrame({'Year': average_close_by_year.index, 'Average Close': average_close_by_year.values})\n",
- "df_average_year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Year\n",
- "1990 23.06\n",
- "1991 18.37\n",
- "1992 15.45\n",
- "1993 12.69\n",
- "1994 13.93\n",
- "1995 12.39\n",
- "1996 16.44\n",
- "1997 22.36\n",
- "1998 25.60\n",
- "1999 24.37\n",
- "2000 23.32\n",
- "2001 25.75\n",
- "2002 27.29\n",
- "2003 21.98\n",
- "2004 15.48\n",
- "2005 12.81\n",
- "2006 12.81\n",
- "2007 17.54\n",
- "2008 32.70\n",
- "2009 31.48\n",
- "2010 22.55\n",
- "2011 24.20\n",
- "2012 17.80\n",
- "2013 14.23\n",
- "2014 14.18\n",
- "2015 16.67\n",
- "2016 15.83\n",
- "2017 11.09\n",
- "2018 16.64\n",
- "2019 15.39\n",
- "2020 29.25\n",
- "2021 19.66\n",
- "2022 25.64\n",
- "2023 16.85\n",
- "2024 13.62\n",
- "Name: CLOSE, dtype: float64"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Calculamos el promedio de la columna 'Close' para cada 'Year'\n",
- "average_close_by_year = df.groupby('Year')['CLOSE'].mean().round(2)\n",
- "average_close_by_year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Level\n",
- "Low 5276\n",
- "Medium 2582\n",
- "High 720\n",
- "dtype: int64"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Contamos el número de días con niveles 'High', 'Medium' y 'Low'\n",
- "count_levels = df.groupby('Level').size()\n",
- "count_levels"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.4"
- },
- "toc": {
- "base_numbering": 1,
- "nav_menu": {},
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "title_cell": "Table of Contents",
- "title_sidebar": "Contents",
- "toc_cell": false,
- "toc_position": {},
- "toc_section_display": true,
- "toc_window_display": false
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
From 3bdcd70c8dab8a28462b49753ba17aac03bf869c Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Sat, 6 Jan 2024 22:21:09 -0500
Subject: [PATCH 06/14] Delete
assignments/assignment_2/group_7_ass_2_2024.ipynb
---
.../assignment_2/group_7_ass_2_2024.ipynb | 1479 -----------------
1 file changed, 1479 deletions(-)
delete mode 100644 assignments/assignment_2/group_7_ass_2_2024.ipynb
diff --git a/assignments/assignment_2/group_7_ass_2_2024.ipynb b/assignments/assignment_2/group_7_ass_2_2024.ipynb
deleted file mode 100644
index 731a3c64..00000000
--- a/assignments/assignment_2/group_7_ass_2_2024.ipynb
+++ /dev/null
@@ -1,1479 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 1 Assignment 2\n",
- "\n",
- "It is totally prohibited to use any kind of loop. You can use stackoverflow. If you copy codes from previous answers, explain each step. No explanation is `0 points`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Lists\n",
- "1. Show the indices of the `np.nan` values in the `f_list` list. We want to see this output: `The indices 0, 1, 4, 7 have np.nan values.` **Hint: Use print function and [f-strings](https://realpython.com/python-f-strings/) to insert the indices values.**
\n",
- "\n",
- "2. Replicate 4 times the values of the list `p2_list`. We expect an ouput like this: `[ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]`. **Hint: Use multiplication function in `lists`and see the output.**
\n",
- "3. Print the length of `f_list`. **Hint: Length function**
\n",
- "4. Print `My teacher assistant is so boring.` using `text1` list. **Hint: Use the [`join` function](https://stackoverflow.com/questions/493819/why-is-it-string-joinlist-instead-of-list-joinstring)**
\n",
- "5. Print `My TA is so boring, but is very funny.` using `text1` list.**Hint: Use the `join` function, and `extend` method.**
\n",
- "6. Print
\n",
- "`The max value of values1 is 86 and is located in the 0 index. `
\n",
- "`The min value of values1 is 0 and is located in the 7 index. `
**Hint: Use the `f-string`, `min`, and `max` functions.**\n",
- "
\n",
- "7. Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
- "
\n",
- "8. Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [],
- "source": [
- "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [],
- "source": [
- "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [],
- "source": [
- "p2_list = [ 2 , 3, 4, 5 ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
- "metadata": {},
- "outputs": [],
- "source": [
- "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "# These two lists a\n",
- "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The indices [0, 1, 4, 7] have np.nan values.\n"
- ]
- }
- ],
- "source": [
- " # Answer 1\n",
- "\n",
- "nan_values = [i for i, value in enumerate(f_list) if value is np.nan]\n",
- "\n",
- "print(f\"The indices {nan_values} have np.nan values.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5]\n"
- ]
- }
- ],
- "source": [
- " # Answer 2\n",
- "\n",
- "p2_list_2 = p2_list * 4\n",
- "\n",
- "print(p2_list_2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "8\n"
- ]
- }
- ],
- "source": [
- " # Answer 3\n",
- "\n",
- "print(len(f_list))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My teacher assistant is so boring.'"
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- " # Answer 4\n",
- "\n",
- "answer_4 = ' '.join(text1)\n",
- "answer_4"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My TA My teacher assistant is so boring but is very funny'"
- ]
- },
- "execution_count": 43,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- " # Answer 5\n",
- " \n",
- "text1[-1] = text1[-1].rstrip('.')\n",
- " \n",
- "text_answer = ['My', 'TA']\n",
- "\n",
- "text_answer_b = ['but', 'is', 'very', 'funny']\n",
- "\n",
- "text_answer.extend(text1 + text_answer_b)\n",
- "\n",
- "answer_5 = ' '.join(text_answer)\n",
- "answer_5"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The max value of values1 is 86 and is located in the 0 index.\n"
- ]
- }
- ],
- "source": [
- " # Answer 6.1\n",
- " \n",
- "max_index = values1.index(max(values1))\n",
- " \n",
- "print(f\"The max value of values1 is {max(values1)} and is located in the {max_index} index.\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The min value of values1 is 0 and is located in the 7 index.\n"
- ]
- }
- ],
- "source": [
- " # Answer 6.2\n",
- " \n",
- "min_index = values1.index(min(values1))\n",
- " \n",
- "print(f\"The min value of values1 is {min(values1)} and is located in the {min_index} index.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Names: ('CHRISTIAN SANTOS', 'CRISTIAN NASSER', 'ANGELICA KARINA', 'JOSE ROBERTO', 'ABEL FERNANDO', 'MEIR ALVARO', 'JOSELIN ALEXANDRA', 'LEONEL ARTURO', 'JOSE FELIPE', 'AFRANIA', 'BIANCA MARIETTE', 'ADRIAN ANDRE', 'DORKAS YOMIRA JHERMY', 'LADY ALY', 'HECTOR ANDRE', 'GUSTAVO', 'PERSEO MARCELO', 'MIGUEL ALONZO', 'NICOLAS', 'ROCIO GABRIELA', 'JANE CAMILA', 'MARIA ELISA', 'ALEJANDRO', 'KARLINE ROSMELI', 'STEPHY ROSARIO', 'VALERIA CECILIA', 'SEBASTIAN RENATO', 'JUAN CARLOS', 'MARIANA', 'ANDREA BRIZETH', 'ERICK JOSUE', 'JOSUE DANIEL', 'FABIO MANUEL', 'FERNANDA NICOLLE', 'ANGELA ADELINA', 'CESAR DANTE', 'GABRIELA ISABEL', 'ANGEL MAURICIO', 'JUAN DIEGO', 'ARONE', 'PERCY ALBERTH', 'KEVIN ARTURO', 'CESAR ERNESTO', 'CÉSAR AGUSTO', 'DIANA EDITH', 'RODRIGO FRANCO', 'GRETTEL ALEXANDRA', 'ROSA ANGELA', 'DANTE OMAR', 'YAJAIRA ALEXANDRA', 'JORGE ALBERTO', 'ALEXIS')\n",
- "Last Names: ('CORNEJO SANCHEZ', 'ORELLANA QUISPE', 'MORALES CHOQUEHUANCA', 'GUIMARAY RIBEYRO', 'CAMACHO GAVIDIA', 'TINTAYA ORIHUELA', 'CHAVEZ MARTINEZ', 'FIGUEROA MURO', 'GOMEZ CRIBILLERO', 'PALOMINO SEGUÍN', 'LUZON CUEVA', 'SUAÑA ZEGARRA', 'SOTO POMACHAGUA', 'FIORENTINO MARTINEZ', 'LAMA MAVILA', 'MEZA HINOJO', 'LOZADA MURILLO', 'ZAMBRANO JIMENEZ', 'JACOBS LUQUE', 'VIDAL VIDAL', 'TORRES ANICAMA', 'LOPEZ ESTRADA', 'BOYCO ORAMS', 'DIAZ BERROSPI', 'RIEGA ESCALANTE', 'LEVANO TORRES', 'ESQUIVES BRAVO', 'PEREZ GONZALES', 'OTERO MAGUIÑA', 'CLAVO CAMPOS', 'AGUILAR GARCIA', 'CALDAS VELASQUEZ', 'SALAS NUÑEZ BORJA', 'PIZARRO VILLANES', 'QUILLATUPA MORALES', 'HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'HINOJOSA CAHUANA', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ', 'FLORES CADILLO')\n"
- ]
- }
- ],
- "source": [
- " # Answer 7\n",
- "\n",
- "last_names, names = zip(*map(lambda x: x.split(', '), last_and_name))\n",
- " # the lambda function using split divides each name from each last name\n",
- " # the function zip takes the values and puts them in tuples\n",
- " # \"last_names, names\" indicates the code to split the tuples in two separate lists\n",
- "print(\"Names:\", names)\n",
- "print(\"Last Names:\", last_names)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ']\n"
- ]
- }
- ],
- "source": [
- " # Answer 8\n",
- " \n",
- "answer_8 = [last_name for last_name, email in zip(last_names, emails) if not email]\n",
- " # zip function pairs last names with its corresponding emails into tuples\n",
- " # \"last_name for last_name, email in ... if not email\" indicates the last name for each pair if email is empty\n",
- "print(answer_8)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.3 Strings\n",
- "\n",
- "\n",
- "1. Drop the duplicated blank spaces in `str1`. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.**Hint: Use `split` method and `join` function.**\n",
- "\n",
- "2. Get the number of letters in the string .**Hint: Use `len`function.**\n",
- "\n",
- "3. Get the number of blank spaces (all of them) in the string .**Hint: Use `len`function.**\n",
- "\n",
- "4. Get the position of `@` in each string in the emails list. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "5. Identifies if exists `.edu.` in each string in the `emails` list. Get a list of Booleans. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "6. Get all the strings before the first dot `.`in each string in the `emails` list. Identifies how many of them has `@`. **Hint: Use `map`function and `find` method.**"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Dear professor, the reason why I created this new branch is to correct the fact that I mistakenly edited the main branch of my group when I was writing my part. So, I was the one in charge to complete the \"Strings\" part, and here it is. I hope you can understand this."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [],
- "source": [
- "str1 = 'I am too old'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [],
- "source": [
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 50,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "I am too old\n"
- ]
- }
- ],
- "source": [
- "# 1. Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
- "\n",
- "# First, we split the string into a list of words\n",
- "list_words = str1.split()\n",
- "\n",
- "# Then, we remove the empty strings from the list\n",
- "non_empty_words = [word for word in list_words if word]\n",
- "\n",
- "# Lastly, we join the non-empty words back into a string\n",
- "result_str = ' '.join(non_empty_words)\n",
- "\n",
- "print(result_str)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 51,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Number of letters: 9\n"
- ]
- }
- ],
- "source": [
- "# 2. Get the number of letters in the string .Hint: Use lenfunction.\n",
- "\n",
- "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
- "# using the len function (to printe the lenght)\n",
- "# and verifying that the character is alphabetic (a letter), using the if function.\n",
- "num_letters = len([char for char in str1 if char.isalpha()])\n",
- "\n",
- "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
- "print(\"Number of letters:\", num_letters)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 52,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Number of blank spaces: 85\n"
- ]
- }
- ],
- "source": [
- "# 3. Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
- "\n",
- "# First, we create a list with the blank spaces, using the \"isspace\" method\n",
- "blank_space = [char for char in str1 if char.isspace()]\n",
- "\n",
- "# Then we apply the function len() to the list to get the total number of blank spaces\n",
- "num_blank_spaces = len(blank_space)\n",
- "\n",
- "# Finally, we print the result\n",
- "print(\"Number of blank spaces:\", num_blank_spaces)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Positions of '@': [9, 11, 14, 9, 12, 8, 14, 9, 8, 16, 12, 13, 6, 9, 10, 12, 8, 11, 14, 6, 11, 7, 15, 9, 12, 8, 9, 11, 13, 6, 9, 12, 11, 16, 11]\n"
- ]
- }
- ],
- "source": [
- "# 4. Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
- "\n",
- "# First we use the map() function to apply the specified lambda function to each element in the emails list. \n",
- "# Then, with the lambda function takes an email address (email) and finds the position of the '@' symbol using the find() method.\n",
- "# Also, list(...) Converts the result into a list.\n",
- "positions_at = list(map(lambda email: email.find('@'), emails))\n",
- "\n",
- "# Finally, we print the results\n",
- "print(\"Positions of '@':\", positions_at)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 54,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Contains '.edu.': [True, True, True, False, False, True, True, False, False, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]\n"
- ]
- }
- ],
- "source": [
- "# 5. Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method. # \n",
- "\n",
- "# First, we use the map function to apply a lambda function to each element in the emails list.\n",
- "# The lambda function checks if '.edu.' is present in each email address.\n",
- "contains_edu_lambda = lambda email: '.edu.' in email\n",
- "mapped_result = map(contains_edu_lambda, emails)\n",
- "\n",
- "# Then, we convert the mapped result into a list.\n",
- "contains_edu_list = list(mapped_result)\n",
- "\n",
- "# Finally we print the result\n",
- "print(\"Contains '.edu.':\", contains_edu_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Substrings before the first dot: ['cscornejo@pucp', 'orellana', 'karina', 'a20083223@pucp', 'abel', 'mtintaya@pucp', 'joselin', 'a20105737@pucp', 'jfgomezc@pucp', 'afrania', 'luzon', 'adrian', 'soto', 'a20132766@pucp', 'andre', 'gustavo', 'pmlozada@pucp', 'm', 'nicolas', 'gvidal@pucp', 'jane', 'm', 'alejandro', 'a20167070@pucp', 'riega', 'vlevanot@pucp', 'sesquives@pucp', 'perez', 'mariana', 'aclavo@pucp', 'a20182474@pucp', 'josue', 'fabio', 'fernanda', 'aquillatupa@pucp']\n",
- "Number of substrings containing '@': 14\n"
- ]
- }
- ],
- "source": [
- "# 6. Get all the strings before the first dot '.' in each string in the emails list.\n",
- "# First, we identify how many of them have '@'.\n",
- "\n",
- "# Then we use the map function to apply a lambda function to each element in the emails list.\n",
- "# The lambda function extracts the substring before the first dot '.' and checks if it contains '@'.\n",
- "substring_before_dot_lambda = lambda email: email.split('.')[0]\n",
- "mapped_result = map(substring_before_dot_lambda, emails)\n",
- "\n",
- "# After that, we convert the mapped result into a list.\n",
- "substring_before_dot_list = list(mapped_result)\n",
- "\n",
- "# Also, we count how many substrings contain '@' and print the results\n",
- "count_with_at = sum('@' in substring for substring in substring_before_dot_list)\n",
- "\n",
- "print(\"Substrings before the first dot:\", substring_before_dot_list)\n",
- "print(\"Number of substrings containing '@':\", count_with_at)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.4 Pandas\n",
- "\n",
- "You can and should always ask ChatGPT, BARD, Bing, etc.\n",
- "In this exercise, you will work with financial data. Follow the steps below:\n",
- "\n",
- "1. Load Data:\n",
- " Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\n",
- " Use `pd.read_csv()` adn the link to load the data into a DataFrame.\n",
- "\n",
- "2. Explore Data:\n",
- " a) Display the first five rows of the DataFrame.\n",
- " b) Display the summary statistics of the DataFrame.\n",
- "\n",
- "3. Add Columns:\n",
- " a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it.\n",
- " b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion. \n",
- "\n",
- "4. Add Rows:\n",
- " a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
- "\n",
- "5. Analysis:\n",
- " a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
- " b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
- "\n",
- "Suggestions:\n",
- " - Use `pd.cut()` for categorizing 'Close' values.\n",
- " - Use `pd.to_datetime()` and `dt.year` to extract the year from a date.\n",
- " - Use `DataFrame.append()` or `pd.concat()` to add rows.\n",
- " - Use `groupby()` for aggregation tasks."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 56,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1\n",
- "import pandas as pd\n",
- "\n",
- "# This code loads the data from the provided URL.\n",
- "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
- "df = pd.read_csv(url)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 57,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
- "\n",
- "[8577 rows x 5 columns]"
- ]
- },
- "execution_count": 57,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# We check the data assigned to the variable 'df'.\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 58,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Primeras cinco filas del DataFrame:\n",
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "\n",
- "Estadísticas resumidas del DataFrame:\n",
- " OPEN HIGH LOW CLOSE\n",
- "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
- "mean 19.666454 20.474364 18.914884 19.580374\n",
- "std 7.979066 8.439927 7.469827 7.906214\n",
- "min 9.010000 9.310000 8.560000 9.140000\n",
- "25% 13.940000 14.540000 13.400000 13.880000\n",
- "50% 17.790000 18.470000 17.220000 17.760000\n",
- "75% 23.100000 23.960000 22.320000 22.990000\n",
- "max 82.690000 89.530000 72.760000 82.690000\n"
- ]
- }
- ],
- "source": [
- "# 2\n",
- "# We display the first five rows of the DataFrame.\n",
- "print(\"Primeras cinco filas del DataFrame:\")\n",
- "print(df.head())\n",
- "\n",
- "# We display the summarized statistics of the DataFrame.\n",
- "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
- "print(df.describe())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 59,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- " Low | \n",
- " 2023 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "\n",
- "[8577 rows x 7 columns]"
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 3\n",
- "# We add a 'Level' column to categorize the 'CLOSE' values\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "\n",
- "# We add the 'Year' column extracted from the 'DATE' column, keeping only the year using the dt.year function.\n",
- "# Firstly, the 'DATE' variable was categorized as datetime.\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 60,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8577 | \n",
- " 01/06/2024 | \n",
- " 14.27 | \n",
- " 14.61 | \n",
- " 13.32 | \n",
- " 13.38 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8578 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
- "\n",
- "[8578 rows x 7 columns]"
- ]
- },
- "execution_count": 60,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 4\n",
- "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
- "df = df.drop(['Level', 'Year'], axis=1)\n",
- "\n",
- "# Agregamos la nueva fila solicitada\n",
- "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
- " 'OPEN': [14.27], \n",
- " 'HIGH': [14.61], \n",
- " 'LOW': [13.32], \n",
- " 'CLOSE': [13.38]}) \n",
- "\n",
- "# Agregamos la nueva fila al DataFrame existente\n",
- "df = pd.concat([df, new_row], ignore_index=True)\n",
- "\n",
- "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 61,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Year | \n",
- " Average Close | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1990 | \n",
- " 23.06 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1991 | \n",
- " 18.37 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1992 | \n",
- " 15.45 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1993 | \n",
- " 12.69 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1994 | \n",
- " 13.93 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " 1995 | \n",
- " 12.39 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " 1996 | \n",
- " 16.44 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " 1997 | \n",
- " 22.36 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " 1998 | \n",
- " 25.60 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " 1999 | \n",
- " 24.37 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " 2000 | \n",
- " 23.32 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " 2001 | \n",
- " 25.75 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " 2002 | \n",
- " 27.29 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " 2003 | \n",
- " 21.98 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " 2004 | \n",
- " 15.48 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " 2005 | \n",
- " 12.81 | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " 2006 | \n",
- " 12.81 | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " 2007 | \n",
- " 17.54 | \n",
- "
\n",
- " \n",
- " | 18 | \n",
- " 2008 | \n",
- " 32.70 | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " 2009 | \n",
- " 31.48 | \n",
- "
\n",
- " \n",
- " | 20 | \n",
- " 2010 | \n",
- " 22.55 | \n",
- "
\n",
- " \n",
- " | 21 | \n",
- " 2011 | \n",
- " 24.20 | \n",
- "
\n",
- " \n",
- " | 22 | \n",
- " 2012 | \n",
- " 17.80 | \n",
- "
\n",
- " \n",
- " | 23 | \n",
- " 2013 | \n",
- " 14.23 | \n",
- "
\n",
- " \n",
- " | 24 | \n",
- " 2014 | \n",
- " 14.18 | \n",
- "
\n",
- " \n",
- " | 25 | \n",
- " 2015 | \n",
- " 16.67 | \n",
- "
\n",
- " \n",
- " | 26 | \n",
- " 2016 | \n",
- " 15.83 | \n",
- "
\n",
- " \n",
- " | 27 | \n",
- " 2017 | \n",
- " 11.09 | \n",
- "
\n",
- " \n",
- " | 28 | \n",
- " 2018 | \n",
- " 16.64 | \n",
- "
\n",
- " \n",
- " | 29 | \n",
- " 2019 | \n",
- " 15.39 | \n",
- "
\n",
- " \n",
- " | 30 | \n",
- " 2020 | \n",
- " 29.25 | \n",
- "
\n",
- " \n",
- " | 31 | \n",
- " 2021 | \n",
- " 19.66 | \n",
- "
\n",
- " \n",
- " | 32 | \n",
- " 2022 | \n",
- " 25.64 | \n",
- "
\n",
- " \n",
- " | 33 | \n",
- " 2023 | \n",
- " 16.85 | \n",
- "
\n",
- " \n",
- " | 34 | \n",
- " 2024 | \n",
- " 13.62 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Year Average Close\n",
- "0 1990 23.06\n",
- "1 1991 18.37\n",
- "2 1992 15.45\n",
- "3 1993 12.69\n",
- "4 1994 13.93\n",
- "5 1995 12.39\n",
- "6 1996 16.44\n",
- "7 1997 22.36\n",
- "8 1998 25.60\n",
- "9 1999 24.37\n",
- "10 2000 23.32\n",
- "11 2001 25.75\n",
- "12 2002 27.29\n",
- "13 2003 21.98\n",
- "14 2004 15.48\n",
- "15 2005 12.81\n",
- "16 2006 12.81\n",
- "17 2007 17.54\n",
- "18 2008 32.70\n",
- "19 2009 31.48\n",
- "20 2010 22.55\n",
- "21 2011 24.20\n",
- "22 2012 17.80\n",
- "23 2013 14.23\n",
- "24 2014 14.18\n",
- "25 2015 16.67\n",
- "26 2016 15.83\n",
- "27 2017 11.09\n",
- "28 2018 16.64\n",
- "29 2019 15.39\n",
- "30 2020 29.25\n",
- "31 2021 19.66\n",
- "32 2022 25.64\n",
- "33 2023 16.85\n",
- "34 2024 13.62"
- ]
- },
- "execution_count": 61,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 5\n",
- "# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
- "average_close_by_year = df.groupby('Year')['CLOSE'].mean()\n",
- "\n",
- "# Redondea los valores a tres decimales\n",
- "average_close_by_year = average_close_by_year.round(2)\n",
- "\n",
- "# Crea un nuevo DataFrame con los resultados\n",
- "df_average_year = pd.DataFrame({'Year': average_close_by_year.index, 'Average Close': average_close_by_year.values})\n",
- "df_average_year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 62,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Low 5276\n",
- "Medium 2582\n",
- "High 720\n",
- "Name: Level, dtype: int64"
- ]
- },
- "execution_count": 62,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
- "df = df['Level'].value_counts()\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.7"
- },
- "toc": {
- "base_numbering": 1,
- "nav_menu": {},
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "title_cell": "Table of Contents",
- "title_sidebar": "Contents",
- "toc_cell": false,
- "toc_position": {},
- "toc_section_display": true,
- "toc_window_display": false
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
From 1a0d67bfed8ffc738a21ffa2331616e4c95acaf8 Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Sat, 6 Jan 2024 22:21:30 -0500
Subject: [PATCH 07/14] #33
---
.../assignment_2/group_7_ass_2_2024.ipynb | 1479 +++++++++++++++++
1 file changed, 1479 insertions(+)
create mode 100644 assignments/assignment_2/group_7_ass_2_2024.ipynb
diff --git a/assignments/assignment_2/group_7_ass_2_2024.ipynb b/assignments/assignment_2/group_7_ass_2_2024.ipynb
new file mode 100644
index 00000000..731a3c64
--- /dev/null
+++ b/assignments/assignment_2/group_7_ass_2_2024.ipynb
@@ -0,0 +1,1479 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 1 Assignment 2\n",
+ "\n",
+ "It is totally prohibited to use any kind of loop. You can use stackoverflow. If you copy codes from previous answers, explain each step. No explanation is `0 points`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1.2 Lists\n",
+ "1. Show the indices of the `np.nan` values in the `f_list` list. We want to see this output: `The indices 0, 1, 4, 7 have np.nan values.` **Hint: Use print function and [f-strings](https://realpython.com/python-f-strings/) to insert the indices values.**
\n",
+ "\n",
+ "2. Replicate 4 times the values of the list `p2_list`. We expect an ouput like this: `[ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]`. **Hint: Use multiplication function in `lists`and see the output.**
\n",
+ "3. Print the length of `f_list`. **Hint: Length function**
\n",
+ "4. Print `My teacher assistant is so boring.` using `text1` list. **Hint: Use the [`join` function](https://stackoverflow.com/questions/493819/why-is-it-string-joinlist-instead-of-list-joinstring)**
\n",
+ "5. Print `My TA is so boring, but is very funny.` using `text1` list.**Hint: Use the `join` function, and `extend` method.**
\n",
+ "6. Print
\n",
+ "`The max value of values1 is 86 and is located in the 0 index. `
\n",
+ "`The min value of values1 is 0 and is located in the 7 index. `
**Hint: Use the `f-string`, `min`, and `max` functions.**\n",
+ "
\n",
+ "7. Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
+ "
\n",
+ "8. Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p2_list = [ 2 , 3, 4, 5 ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# These two lists a\n",
+ "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
+ "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The indices [0, 1, 4, 7] have np.nan values.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 1\n",
+ "\n",
+ "nan_values = [i for i, value in enumerate(f_list) if value is np.nan]\n",
+ "\n",
+ "print(f\"The indices {nan_values} have np.nan values.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 2\n",
+ "\n",
+ "p2_list_2 = p2_list * 4\n",
+ "\n",
+ "print(p2_list_2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "8\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 3\n",
+ "\n",
+ "print(len(f_list))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'My teacher assistant is so boring.'"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " # Answer 4\n",
+ "\n",
+ "answer_4 = ' '.join(text1)\n",
+ "answer_4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'My TA My teacher assistant is so boring but is very funny'"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " # Answer 5\n",
+ " \n",
+ "text1[-1] = text1[-1].rstrip('.')\n",
+ " \n",
+ "text_answer = ['My', 'TA']\n",
+ "\n",
+ "text_answer_b = ['but', 'is', 'very', 'funny']\n",
+ "\n",
+ "text_answer.extend(text1 + text_answer_b)\n",
+ "\n",
+ "answer_5 = ' '.join(text_answer)\n",
+ "answer_5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The max value of values1 is 86 and is located in the 0 index.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 6.1\n",
+ " \n",
+ "max_index = values1.index(max(values1))\n",
+ " \n",
+ "print(f\"The max value of values1 is {max(values1)} and is located in the {max_index} index.\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The min value of values1 is 0 and is located in the 7 index.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 6.2\n",
+ " \n",
+ "min_index = values1.index(min(values1))\n",
+ " \n",
+ "print(f\"The min value of values1 is {min(values1)} and is located in the {min_index} index.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Names: ('CHRISTIAN SANTOS', 'CRISTIAN NASSER', 'ANGELICA KARINA', 'JOSE ROBERTO', 'ABEL FERNANDO', 'MEIR ALVARO', 'JOSELIN ALEXANDRA', 'LEONEL ARTURO', 'JOSE FELIPE', 'AFRANIA', 'BIANCA MARIETTE', 'ADRIAN ANDRE', 'DORKAS YOMIRA JHERMY', 'LADY ALY', 'HECTOR ANDRE', 'GUSTAVO', 'PERSEO MARCELO', 'MIGUEL ALONZO', 'NICOLAS', 'ROCIO GABRIELA', 'JANE CAMILA', 'MARIA ELISA', 'ALEJANDRO', 'KARLINE ROSMELI', 'STEPHY ROSARIO', 'VALERIA CECILIA', 'SEBASTIAN RENATO', 'JUAN CARLOS', 'MARIANA', 'ANDREA BRIZETH', 'ERICK JOSUE', 'JOSUE DANIEL', 'FABIO MANUEL', 'FERNANDA NICOLLE', 'ANGELA ADELINA', 'CESAR DANTE', 'GABRIELA ISABEL', 'ANGEL MAURICIO', 'JUAN DIEGO', 'ARONE', 'PERCY ALBERTH', 'KEVIN ARTURO', 'CESAR ERNESTO', 'CÉSAR AGUSTO', 'DIANA EDITH', 'RODRIGO FRANCO', 'GRETTEL ALEXANDRA', 'ROSA ANGELA', 'DANTE OMAR', 'YAJAIRA ALEXANDRA', 'JORGE ALBERTO', 'ALEXIS')\n",
+ "Last Names: ('CORNEJO SANCHEZ', 'ORELLANA QUISPE', 'MORALES CHOQUEHUANCA', 'GUIMARAY RIBEYRO', 'CAMACHO GAVIDIA', 'TINTAYA ORIHUELA', 'CHAVEZ MARTINEZ', 'FIGUEROA MURO', 'GOMEZ CRIBILLERO', 'PALOMINO SEGUÍN', 'LUZON CUEVA', 'SUAÑA ZEGARRA', 'SOTO POMACHAGUA', 'FIORENTINO MARTINEZ', 'LAMA MAVILA', 'MEZA HINOJO', 'LOZADA MURILLO', 'ZAMBRANO JIMENEZ', 'JACOBS LUQUE', 'VIDAL VIDAL', 'TORRES ANICAMA', 'LOPEZ ESTRADA', 'BOYCO ORAMS', 'DIAZ BERROSPI', 'RIEGA ESCALANTE', 'LEVANO TORRES', 'ESQUIVES BRAVO', 'PEREZ GONZALES', 'OTERO MAGUIÑA', 'CLAVO CAMPOS', 'AGUILAR GARCIA', 'CALDAS VELASQUEZ', 'SALAS NUÑEZ BORJA', 'PIZARRO VILLANES', 'QUILLATUPA MORALES', 'HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'HINOJOSA CAHUANA', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ', 'FLORES CADILLO')\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 7\n",
+ "\n",
+ "last_names, names = zip(*map(lambda x: x.split(', '), last_and_name))\n",
+ " # the lambda function using split divides each name from each last name\n",
+ " # the function zip takes the values and puts them in tuples\n",
+ " # \"last_names, names\" indicates the code to split the tuples in two separate lists\n",
+ "print(\"Names:\", names)\n",
+ "print(\"Last Names:\", last_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ']\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 8\n",
+ " \n",
+ "answer_8 = [last_name for last_name, email in zip(last_names, emails) if not email]\n",
+ " # zip function pairs last names with its corresponding emails into tuples\n",
+ " # \"last_name for last_name, email in ... if not email\" indicates the last name for each pair if email is empty\n",
+ "print(answer_8)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1.3 Strings\n",
+ "\n",
+ "\n",
+ "1. Drop the duplicated blank spaces in `str1`. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.**Hint: Use `split` method and `join` function.**\n",
+ "\n",
+ "2. Get the number of letters in the string .**Hint: Use `len`function.**\n",
+ "\n",
+ "3. Get the number of blank spaces (all of them) in the string .**Hint: Use `len`function.**\n",
+ "\n",
+ "4. Get the position of `@` in each string in the emails list. **Hint: Use `map`function and `find` method.**\n",
+ "\n",
+ "5. Identifies if exists `.edu.` in each string in the `emails` list. Get a list of Booleans. **Hint: Use `map`function and `find` method.**\n",
+ "\n",
+ "6. Get all the strings before the first dot `.`in each string in the `emails` list. Identifies how many of them has `@`. **Hint: Use `map`function and `find` method.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Dear professor, the reason why I created this new branch is to correct the fact that I mistakenly edited the main branch of my group when I was writing my part. So, I was the one in charge to complete the \"Strings\" part, and here it is. I hope you can understand this."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "str1 = 'I am too old'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I am too old\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 1. Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
+ "\n",
+ "# First, we split the string into a list of words\n",
+ "list_words = str1.split()\n",
+ "\n",
+ "# Then, we remove the empty strings from the list\n",
+ "non_empty_words = [word for word in list_words if word]\n",
+ "\n",
+ "# Lastly, we join the non-empty words back into a string\n",
+ "result_str = ' '.join(non_empty_words)\n",
+ "\n",
+ "print(result_str)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of letters: 9\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 2. Get the number of letters in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
+ "# using the len function (to printe the lenght)\n",
+ "# and verifying that the character is alphabetic (a letter), using the if function.\n",
+ "num_letters = len([char for char in str1 if char.isalpha()])\n",
+ "\n",
+ "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
+ "print(\"Number of letters:\", num_letters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of blank spaces: 85\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 3. Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# First, we create a list with the blank spaces, using the \"isspace\" method\n",
+ "blank_space = [char for char in str1 if char.isspace()]\n",
+ "\n",
+ "# Then we apply the function len() to the list to get the total number of blank spaces\n",
+ "num_blank_spaces = len(blank_space)\n",
+ "\n",
+ "# Finally, we print the result\n",
+ "print(\"Number of blank spaces:\", num_blank_spaces)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Positions of '@': [9, 11, 14, 9, 12, 8, 14, 9, 8, 16, 12, 13, 6, 9, 10, 12, 8, 11, 14, 6, 11, 7, 15, 9, 12, 8, 9, 11, 13, 6, 9, 12, 11, 16, 11]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 4. Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "# First we use the map() function to apply the specified lambda function to each element in the emails list. \n",
+ "# Then, with the lambda function takes an email address (email) and finds the position of the '@' symbol using the find() method.\n",
+ "# Also, list(...) Converts the result into a list.\n",
+ "positions_at = list(map(lambda email: email.find('@'), emails))\n",
+ "\n",
+ "# Finally, we print the results\n",
+ "print(\"Positions of '@':\", positions_at)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Contains '.edu.': [True, True, True, False, False, True, True, False, False, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 5. Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method. # \n",
+ "\n",
+ "# First, we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function checks if '.edu.' is present in each email address.\n",
+ "contains_edu_lambda = lambda email: '.edu.' in email\n",
+ "mapped_result = map(contains_edu_lambda, emails)\n",
+ "\n",
+ "# Then, we convert the mapped result into a list.\n",
+ "contains_edu_list = list(mapped_result)\n",
+ "\n",
+ "# Finally we print the result\n",
+ "print(\"Contains '.edu.':\", contains_edu_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Substrings before the first dot: ['cscornejo@pucp', 'orellana', 'karina', 'a20083223@pucp', 'abel', 'mtintaya@pucp', 'joselin', 'a20105737@pucp', 'jfgomezc@pucp', 'afrania', 'luzon', 'adrian', 'soto', 'a20132766@pucp', 'andre', 'gustavo', 'pmlozada@pucp', 'm', 'nicolas', 'gvidal@pucp', 'jane', 'm', 'alejandro', 'a20167070@pucp', 'riega', 'vlevanot@pucp', 'sesquives@pucp', 'perez', 'mariana', 'aclavo@pucp', 'a20182474@pucp', 'josue', 'fabio', 'fernanda', 'aquillatupa@pucp']\n",
+ "Number of substrings containing '@': 14\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 6. Get all the strings before the first dot '.' in each string in the emails list.\n",
+ "# First, we identify how many of them have '@'.\n",
+ "\n",
+ "# Then we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function extracts the substring before the first dot '.' and checks if it contains '@'.\n",
+ "substring_before_dot_lambda = lambda email: email.split('.')[0]\n",
+ "mapped_result = map(substring_before_dot_lambda, emails)\n",
+ "\n",
+ "# After that, we convert the mapped result into a list.\n",
+ "substring_before_dot_list = list(mapped_result)\n",
+ "\n",
+ "# Also, we count how many substrings contain '@' and print the results\n",
+ "count_with_at = sum('@' in substring for substring in substring_before_dot_list)\n",
+ "\n",
+ "print(\"Substrings before the first dot:\", substring_before_dot_list)\n",
+ "print(\"Number of substrings containing '@':\", count_with_at)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1.4 Pandas\n",
+ "\n",
+ "You can and should always ask ChatGPT, BARD, Bing, etc.\n",
+ "In this exercise, you will work with financial data. Follow the steps below:\n",
+ "\n",
+ "1. Load Data:\n",
+ " Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\n",
+ " Use `pd.read_csv()` adn the link to load the data into a DataFrame.\n",
+ "\n",
+ "2. Explore Data:\n",
+ " a) Display the first five rows of the DataFrame.\n",
+ " b) Display the summary statistics of the DataFrame.\n",
+ "\n",
+ "3. Add Columns:\n",
+ " a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it.\n",
+ " b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion. \n",
+ "\n",
+ "4. Add Rows:\n",
+ " a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
+ "\n",
+ "5. Analysis:\n",
+ " a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
+ " b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
+ "\n",
+ "Suggestions:\n",
+ " - Use `pd.cut()` for categorizing 'Close' values.\n",
+ " - Use `pd.to_datetime()` and `dt.year` to extract the year from a date.\n",
+ " - Use `DataFrame.append()` or `pd.concat()` to add rows.\n",
+ " - Use `groupby()` for aggregation tasks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1\n",
+ "import pandas as pd\n",
+ "\n",
+ "# This code loads the data from the provided URL.\n",
+ "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
+ "df = pd.read_csv(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8572 | \n",
+ " 12/29/2023 | \n",
+ " 12.55 | \n",
+ " 13.19 | \n",
+ " 12.36 | \n",
+ " 12.45 | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8577 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26\n",
+ "... ... ... ... ... ...\n",
+ "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
+ "\n",
+ "[8577 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# We check the data assigned to the variable 'df'.\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Primeras cinco filas del DataFrame:\n",
+ " DATE OPEN HIGH LOW CLOSE\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26\n",
+ "\n",
+ "Estadísticas resumidas del DataFrame:\n",
+ " OPEN HIGH LOW CLOSE\n",
+ "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
+ "mean 19.666454 20.474364 18.914884 19.580374\n",
+ "std 7.979066 8.439927 7.469827 7.906214\n",
+ "min 9.010000 9.310000 8.560000 9.140000\n",
+ "25% 13.940000 14.540000 13.400000 13.880000\n",
+ "50% 17.790000 18.470000 17.220000 17.760000\n",
+ "75% 23.100000 23.960000 22.320000 22.990000\n",
+ "max 82.690000 89.530000 72.760000 82.690000\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 2\n",
+ "# We display the first five rows of the DataFrame.\n",
+ "print(\"Primeras cinco filas del DataFrame:\")\n",
+ "print(df.head())\n",
+ "\n",
+ "# We display the summarized statistics of the DataFrame.\n",
+ "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
+ "print(df.describe())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ " Level | \n",
+ " Year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8572 | \n",
+ " 12/29/2023 | \n",
+ " 12.55 | \n",
+ " 13.19 | \n",
+ " 12.36 | \n",
+ " 12.45 | \n",
+ " Low | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8577 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE Level Year\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
+ "... ... ... ... ... ... ... ...\n",
+ "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
+ "\n",
+ "[8577 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3\n",
+ "# We add a 'Level' column to categorize the 'CLOSE' values\n",
+ "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
+ " labels=['Low', 'Medium', 'High'])\n",
+ "\n",
+ "# We add the 'Year' column extracted from the 'DATE' column, keeping only the year using the dt.year function.\n",
+ "# Firstly, the 'DATE' variable was categorized as datetime.\n",
+ "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
+ "\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ " Level | \n",
+ " Year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8577 | \n",
+ " 01/06/2024 | \n",
+ " 14.27 | \n",
+ " 14.61 | \n",
+ " 13.32 | \n",
+ " 13.38 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8578 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE Level Year\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
+ "... ... ... ... ... ... ... ...\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
+ "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
+ "\n",
+ "[8578 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 4\n",
+ "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
+ "df = df.drop(['Level', 'Year'], axis=1)\n",
+ "\n",
+ "# Agregamos la nueva fila solicitada\n",
+ "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
+ " 'OPEN': [14.27], \n",
+ " 'HIGH': [14.61], \n",
+ " 'LOW': [13.32], \n",
+ " 'CLOSE': [13.38]}) \n",
+ "\n",
+ "# Agregamos la nueva fila al DataFrame existente\n",
+ "df = pd.concat([df, new_row], ignore_index=True)\n",
+ "\n",
+ "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
+ "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
+ " labels=['Low', 'Medium', 'High'])\n",
+ "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Average Close | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1990 | \n",
+ " 23.06 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1991 | \n",
+ " 18.37 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1992 | \n",
+ " 15.45 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1993 | \n",
+ " 12.69 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1994 | \n",
+ " 13.93 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 1995 | \n",
+ " 12.39 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 1996 | \n",
+ " 16.44 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 1997 | \n",
+ " 22.36 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 1998 | \n",
+ " 25.60 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 1999 | \n",
+ " 24.37 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2000 | \n",
+ " 23.32 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2001 | \n",
+ " 25.75 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 2002 | \n",
+ " 27.29 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2003 | \n",
+ " 21.98 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 2004 | \n",
+ " 15.48 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 2005 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 2006 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 2007 | \n",
+ " 17.54 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 2008 | \n",
+ " 32.70 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 2009 | \n",
+ " 31.48 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 2010 | \n",
+ " 22.55 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 2011 | \n",
+ " 24.20 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 2012 | \n",
+ " 17.80 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 2013 | \n",
+ " 14.23 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 2014 | \n",
+ " 14.18 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 2015 | \n",
+ " 16.67 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 2016 | \n",
+ " 15.83 | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 2017 | \n",
+ " 11.09 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 2018 | \n",
+ " 16.64 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 2019 | \n",
+ " 15.39 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 2020 | \n",
+ " 29.25 | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 2021 | \n",
+ " 19.66 | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 2022 | \n",
+ " 25.64 | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 2023 | \n",
+ " 16.85 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 2024 | \n",
+ " 13.62 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Year Average Close\n",
+ "0 1990 23.06\n",
+ "1 1991 18.37\n",
+ "2 1992 15.45\n",
+ "3 1993 12.69\n",
+ "4 1994 13.93\n",
+ "5 1995 12.39\n",
+ "6 1996 16.44\n",
+ "7 1997 22.36\n",
+ "8 1998 25.60\n",
+ "9 1999 24.37\n",
+ "10 2000 23.32\n",
+ "11 2001 25.75\n",
+ "12 2002 27.29\n",
+ "13 2003 21.98\n",
+ "14 2004 15.48\n",
+ "15 2005 12.81\n",
+ "16 2006 12.81\n",
+ "17 2007 17.54\n",
+ "18 2008 32.70\n",
+ "19 2009 31.48\n",
+ "20 2010 22.55\n",
+ "21 2011 24.20\n",
+ "22 2012 17.80\n",
+ "23 2013 14.23\n",
+ "24 2014 14.18\n",
+ "25 2015 16.67\n",
+ "26 2016 15.83\n",
+ "27 2017 11.09\n",
+ "28 2018 16.64\n",
+ "29 2019 15.39\n",
+ "30 2020 29.25\n",
+ "31 2021 19.66\n",
+ "32 2022 25.64\n",
+ "33 2023 16.85\n",
+ "34 2024 13.62"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 5\n",
+ "# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
+ "average_close_by_year = df.groupby('Year')['CLOSE'].mean()\n",
+ "\n",
+ "# Redondea los valores a tres decimales\n",
+ "average_close_by_year = average_close_by_year.round(2)\n",
+ "\n",
+ "# Crea un nuevo DataFrame con los resultados\n",
+ "df_average_year = pd.DataFrame({'Year': average_close_by_year.index, 'Average Close': average_close_by_year.values})\n",
+ "df_average_year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Low 5276\n",
+ "Medium 2582\n",
+ "High 720\n",
+ "Name: Level, dtype: int64"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
+ "df = df['Level'].value_counts()\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "hide_input": false,
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
From d63662777b3bf3ccfb519aa4e203fb282d52612f Mon Sep 17 00:00:00 2001
From: Rafael Vargas
Date: Mon, 8 Jan 2024 20:35:16 -0500
Subject: [PATCH 08/14] Added group_7_ass_2_2024_re file
---
.../group_7_ass_2_2024_re-checkpoint.ipynb | 105 ++++++++++++++++++
.../assignment_2/group_7_ass_2_2024_re.ipynb | 105 ++++++++++++++++++
2 files changed, 210 insertions(+)
create mode 100644 assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
create mode 100644 assignments/assignment_2/group_7_ass_2_2024_re.ipynb
diff --git a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
new file mode 100644
index 00000000..d4072634
--- /dev/null
+++ b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
@@ -0,0 +1,105 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "bc677f0d",
+ "metadata": {},
+ "source": [
+ "## 1.2 Lists\n",
+ "\n",
+ "1 - Show the indices of the np.nan values in the f_list list. We want to see this output: The indices 0, 1, 4, 7 have np.nan values. Hint: Use print function and f-strings to insert the indices values.\n",
+ "\n",
+ "\n",
+ "2 - Replicate 4 times the values of the list p2_list. We expect an ouput like this: [ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]. Hint: Use multiplication function in listsand see the output.\n",
+ "\n",
+ "\n",
+ "3 - Print the length of f_list. Hint: Length function\n",
+ "\n",
+ "\n",
+ "4 - Print My teacher assistant is so boring. using text1 list. Hint: Use the join function\n",
+ "\n",
+ "\n",
+ "5 - Print My TA is so boring, but is very funny. using text1 list.Hint: Use the join function, and extend method.\n",
+ "\n",
+ "\n",
+ "Print\n",
+ "\n",
+ "The max value of values1 is 86 and is located in the 0 index.\n",
+ "The min value of values1 is 0 and is located in the 7 index.\n",
+ "Hint: Use the f-string, min, and max functions.\n",
+ "\n",
+ "7 - Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
+ "\n",
+ "8 - Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "29504dc0",
+ "metadata": {},
+ "source": [
+ "## 1.3 Strings\n",
+ "\n",
+ "1 - Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
+ "\n",
+ "2 - Get the number of letters in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "3 - Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "4 - Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "5 - Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "6 - Get all the strings before the first dot .in each string in the emails list. Identifies how many of them has @. Hint: Use mapfunction and find method."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c40b32db",
+ "metadata": {},
+ "source": [
+ "## 1.4 Pandas\n",
+ "\n",
+ "You can and should always ask ChatGPT, BARD, Bing, etc. In this exercise, you will work with financial data. Follow the steps below:\n",
+ "\n",
+ "1 - Load Data: Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv Use pd.read_csv() adn the link to load the data into a DataFrame.\n",
+ "\n",
+ "2 - Explore Data: a) Display the first five rows of the DataFrame. b) Display the summary statistics of the DataFrame.\n",
+ "\n",
+ "3 - Add Columns: a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it. b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion.\n",
+ "\n",
+ "4 - Add Rows: a) Add a new row with a date of your choice and fill the other columns with appropriate values.\n",
+ "\n",
+ "5 - Analysis: a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task. b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
+ "\n",
+ "Suggestions:\n",
+ "\n",
+ "Use pd.cut() for categorizing 'Close' values.\n",
+ "Use pd.to_datetime() and dt.year to extract the year from a date.\n",
+ "Use DataFrame.append() or pd.concat() to add rows.\n",
+ "Use groupby() for aggregation tasks."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
new file mode 100644
index 00000000..d4072634
--- /dev/null
+++ b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
@@ -0,0 +1,105 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "bc677f0d",
+ "metadata": {},
+ "source": [
+ "## 1.2 Lists\n",
+ "\n",
+ "1 - Show the indices of the np.nan values in the f_list list. We want to see this output: The indices 0, 1, 4, 7 have np.nan values. Hint: Use print function and f-strings to insert the indices values.\n",
+ "\n",
+ "\n",
+ "2 - Replicate 4 times the values of the list p2_list. We expect an ouput like this: [ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]. Hint: Use multiplication function in listsand see the output.\n",
+ "\n",
+ "\n",
+ "3 - Print the length of f_list. Hint: Length function\n",
+ "\n",
+ "\n",
+ "4 - Print My teacher assistant is so boring. using text1 list. Hint: Use the join function\n",
+ "\n",
+ "\n",
+ "5 - Print My TA is so boring, but is very funny. using text1 list.Hint: Use the join function, and extend method.\n",
+ "\n",
+ "\n",
+ "Print\n",
+ "\n",
+ "The max value of values1 is 86 and is located in the 0 index.\n",
+ "The min value of values1 is 0 and is located in the 7 index.\n",
+ "Hint: Use the f-string, min, and max functions.\n",
+ "\n",
+ "7 - Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
+ "\n",
+ "8 - Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "29504dc0",
+ "metadata": {},
+ "source": [
+ "## 1.3 Strings\n",
+ "\n",
+ "1 - Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
+ "\n",
+ "2 - Get the number of letters in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "3 - Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "4 - Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "5 - Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "6 - Get all the strings before the first dot .in each string in the emails list. Identifies how many of them has @. Hint: Use mapfunction and find method."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c40b32db",
+ "metadata": {},
+ "source": [
+ "## 1.4 Pandas\n",
+ "\n",
+ "You can and should always ask ChatGPT, BARD, Bing, etc. In this exercise, you will work with financial data. Follow the steps below:\n",
+ "\n",
+ "1 - Load Data: Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv Use pd.read_csv() adn the link to load the data into a DataFrame.\n",
+ "\n",
+ "2 - Explore Data: a) Display the first five rows of the DataFrame. b) Display the summary statistics of the DataFrame.\n",
+ "\n",
+ "3 - Add Columns: a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it. b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion.\n",
+ "\n",
+ "4 - Add Rows: a) Add a new row with a date of your choice and fill the other columns with appropriate values.\n",
+ "\n",
+ "5 - Analysis: a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task. b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
+ "\n",
+ "Suggestions:\n",
+ "\n",
+ "Use pd.cut() for categorizing 'Close' values.\n",
+ "Use pd.to_datetime() and dt.year to extract the year from a date.\n",
+ "Use DataFrame.append() or pd.concat() to add rows.\n",
+ "Use groupby() for aggregation tasks."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
From c3f1b2f99c428ae921c132cae32278a4d84102dc Mon Sep 17 00:00:00 2001
From: Analudrs
Date: Mon, 8 Jan 2024 20:54:37 -0500
Subject: [PATCH 09/14] Added information about strings
---
.../group_7_ass_2_2024_re-checkpoint.ipynb | 192 +++++++++++++++++-
.../assignment_2/group_7_ass_2_2024.ipynb | 2 +-
.../assignment_2/group_7_ass_2_2024_re.ipynb | 192 +++++++++++++++++-
3 files changed, 383 insertions(+), 3 deletions(-)
diff --git a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
index d4072634..1768e20d 100644
--- a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
+++ b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
@@ -53,6 +53,196 @@
"6 - Get all the strings before the first dot .in each string in the emails list. Identifies how many of them has @. Hint: Use mapfunction and find method."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "045944b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "str1 = 'I am too old'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "a53dd7b1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "68fe62ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I am too old\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 1. Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
+ "\n",
+ "# First, we split the string into a list of words\n",
+ "list_words = str1.split()\n",
+ "\n",
+ "# Then, we remove the empty strings from the list\n",
+ "non_empty_words = [word for word in list_words if word]\n",
+ "\n",
+ "# Lastly, we join the non-empty words back into a string\n",
+ "result_str = ' '.join(non_empty_words)\n",
+ "\n",
+ "print(result_str)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b9a72a9c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of letters: 9\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 2. Get the number of letters in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
+ "# using the len function (to printe the lenght)\n",
+ "# and verifying that the character is alphabetic (a letter), using the if function.\n",
+ "num_letters = len([char for char in str1 if char.isalpha()])\n",
+ "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
+ "print(\"Number of letters:\", num_letters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "5a939c7f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of blank spaces: 85\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 3. Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# First, we create a list with the blank spaces, using the \"isspace\" method\n",
+ "blank_space = [char for char in str1 if char.isspace()]\n",
+ "\n",
+ "# Then we apply the function len() to the list to get the total number of blank spaces\n",
+ "num_blank_spaces = len(blank_space)\n",
+ "\n",
+ "# Finally, we print the result\n",
+ "print(\"Number of blank spaces:\", num_blank_spaces)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f6566bef",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Positions of '@': [9, 11, 14, 9, 12, 8, 14, 9, 8, 16, 12, 13, 6, 9, 10, 12, 8, 11, 14, 6, 11, 7, 15, 9, 12, 8, 9, 11, 13, 6, 9, 12, 11, 16, 11]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 4. Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "# First we use the map() function to apply the specified lambda function to each element in the emails list. \n",
+ "# Then, with the lambda function takes an email address (email) and finds the position of the '@' symbol using the find() method.\n",
+ "# Also, list(...) Converts the result into a list.\n",
+ "positions_at = list(map(lambda email: email.find('@'), emails))\n",
+ "\n",
+ "# Finally, we print the results\n",
+ "print(\"Positions of '@':\",positions_at)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "d5bb16d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Contains '.edu.': [True, True, True, False, False, True, True, False, False, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 5. Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method. # \n",
+ "\n",
+ "# First, we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function checks if '.edu.' is present in each email address.\n",
+ "contains_edu_lambda = lambda email: '.edu.' in email\n",
+ "mapped_result = map(contains_edu_lambda, emails)\n",
+ "\n",
+ "# Then, we convert the mapped result into a list.\n",
+ "contains_edu_list = list(mapped_result)\n",
+ "\n",
+ "# Finally we print the result\n",
+ "print(\"Contains '.edu.':\", contains_edu_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "0fbd7c03",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Substrings before the first dot: ['cscornejo@pucp', 'orellana', 'karina', 'a20083223@pucp', 'abel', 'mtintaya@pucp', 'joselin', 'a20105737@pucp', 'jfgomezc@pucp', 'afrania', 'luzon', 'adrian', 'soto', 'a20132766@pucp', 'andre', 'gustavo', 'pmlozada@pucp', 'm', 'nicolas', 'gvidal@pucp', 'jane', 'm', 'alejandro', 'a20167070@pucp', 'riega', 'vlevanot@pucp', 'sesquives@pucp', 'perez', 'mariana', 'aclavo@pucp', 'a20182474@pucp', 'josue', 'fabio', 'fernanda', 'aquillatupa@pucp']\n",
+ "Number of substrings containing '@': 14\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 6. Get all the strings before the first dot '.' in each string in the emails list.\n",
+ "# First, we identify how many of them have '@'.\n",
+ "\n",
+ "# Then we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function extracts the substring before the first dot '.' and checks if it contains '@'.\n",
+ "substring_before_dot_lambda = lambda email: email.split('.')[0]\n",
+ "mapped_result = map(substring_before_dot_lambda, emails)\n",
+ "\n",
+ "# After that, we convert the mapped result into a list.\n",
+ "substring_before_dot_list = list(mapped_result)\n",
+ "\n",
+ "# Also, we count how many substrings contain '@' and print the results\n",
+ "count_with_at = sum('@' in substring for substring in substring_before_dot_list)\n",
+ "\n",
+ "print(\"Substrings before the first dot:\", substring_before_dot_list)\n",
+ "print(\"Number of substrings containing '@':\", count_with_at)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "c40b32db",
@@ -97,7 +287,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.11.4"
}
},
"nbformat": 4,
diff --git a/assignments/assignment_2/group_7_ass_2_2024.ipynb b/assignments/assignment_2/group_7_ass_2_2024.ipynb
index 731a3c64..007f4559 100644
--- a/assignments/assignment_2/group_7_ass_2_2024.ipynb
+++ b/assignments/assignment_2/group_7_ass_2_2024.ipynb
@@ -1458,7 +1458,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.7"
+ "version": "3.11.4"
},
"toc": {
"base_numbering": 1,
diff --git a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
index d4072634..1768e20d 100644
--- a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
+++ b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
@@ -53,6 +53,196 @@
"6 - Get all the strings before the first dot .in each string in the emails list. Identifies how many of them has @. Hint: Use mapfunction and find method."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "045944b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "str1 = 'I am too old'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "a53dd7b1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "68fe62ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I am too old\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 1. Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
+ "\n",
+ "# First, we split the string into a list of words\n",
+ "list_words = str1.split()\n",
+ "\n",
+ "# Then, we remove the empty strings from the list\n",
+ "non_empty_words = [word for word in list_words if word]\n",
+ "\n",
+ "# Lastly, we join the non-empty words back into a string\n",
+ "result_str = ' '.join(non_empty_words)\n",
+ "\n",
+ "print(result_str)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b9a72a9c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of letters: 9\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 2. Get the number of letters in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
+ "# using the len function (to printe the lenght)\n",
+ "# and verifying that the character is alphabetic (a letter), using the if function.\n",
+ "num_letters = len([char for char in str1 if char.isalpha()])\n",
+ "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
+ "print(\"Number of letters:\", num_letters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "5a939c7f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of blank spaces: 85\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 3. Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
+ "\n",
+ "# First, we create a list with the blank spaces, using the \"isspace\" method\n",
+ "blank_space = [char for char in str1 if char.isspace()]\n",
+ "\n",
+ "# Then we apply the function len() to the list to get the total number of blank spaces\n",
+ "num_blank_spaces = len(blank_space)\n",
+ "\n",
+ "# Finally, we print the result\n",
+ "print(\"Number of blank spaces:\", num_blank_spaces)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f6566bef",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Positions of '@': [9, 11, 14, 9, 12, 8, 14, 9, 8, 16, 12, 13, 6, 9, 10, 12, 8, 11, 14, 6, 11, 7, 15, 9, 12, 8, 9, 11, 13, 6, 9, 12, 11, 16, 11]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 4. Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
+ "\n",
+ "# First we use the map() function to apply the specified lambda function to each element in the emails list. \n",
+ "# Then, with the lambda function takes an email address (email) and finds the position of the '@' symbol using the find() method.\n",
+ "# Also, list(...) Converts the result into a list.\n",
+ "positions_at = list(map(lambda email: email.find('@'), emails))\n",
+ "\n",
+ "# Finally, we print the results\n",
+ "print(\"Positions of '@':\",positions_at)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "d5bb16d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Contains '.edu.': [True, True, True, False, False, True, True, False, False, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 5. Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method. # \n",
+ "\n",
+ "# First, we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function checks if '.edu.' is present in each email address.\n",
+ "contains_edu_lambda = lambda email: '.edu.' in email\n",
+ "mapped_result = map(contains_edu_lambda, emails)\n",
+ "\n",
+ "# Then, we convert the mapped result into a list.\n",
+ "contains_edu_list = list(mapped_result)\n",
+ "\n",
+ "# Finally we print the result\n",
+ "print(\"Contains '.edu.':\", contains_edu_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "0fbd7c03",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Substrings before the first dot: ['cscornejo@pucp', 'orellana', 'karina', 'a20083223@pucp', 'abel', 'mtintaya@pucp', 'joselin', 'a20105737@pucp', 'jfgomezc@pucp', 'afrania', 'luzon', 'adrian', 'soto', 'a20132766@pucp', 'andre', 'gustavo', 'pmlozada@pucp', 'm', 'nicolas', 'gvidal@pucp', 'jane', 'm', 'alejandro', 'a20167070@pucp', 'riega', 'vlevanot@pucp', 'sesquives@pucp', 'perez', 'mariana', 'aclavo@pucp', 'a20182474@pucp', 'josue', 'fabio', 'fernanda', 'aquillatupa@pucp']\n",
+ "Number of substrings containing '@': 14\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 6. Get all the strings before the first dot '.' in each string in the emails list.\n",
+ "# First, we identify how many of them have '@'.\n",
+ "\n",
+ "# Then we use the map function to apply a lambda function to each element in the emails list.\n",
+ "# The lambda function extracts the substring before the first dot '.' and checks if it contains '@'.\n",
+ "substring_before_dot_lambda = lambda email: email.split('.')[0]\n",
+ "mapped_result = map(substring_before_dot_lambda, emails)\n",
+ "\n",
+ "# After that, we convert the mapped result into a list.\n",
+ "substring_before_dot_list = list(mapped_result)\n",
+ "\n",
+ "# Also, we count how many substrings contain '@' and print the results\n",
+ "count_with_at = sum('@' in substring for substring in substring_before_dot_list)\n",
+ "\n",
+ "print(\"Substrings before the first dot:\", substring_before_dot_list)\n",
+ "print(\"Number of substrings containing '@':\", count_with_at)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "c40b32db",
@@ -97,7 +287,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.11.4"
}
},
"nbformat": 4,
From 29e6bac297f67cbf50b6d4115afe1fac8f2ce5ef Mon Sep 17 00:00:00 2001
From: Rafael Vargas
Date: Mon, 8 Jan 2024 21:05:39 -0500
Subject: [PATCH 10/14] Added answers for 1.2 List
---
.../group_7_ass_2_2024_re-checkpoint.ipynb | 218 +++++++++++++++++-
.../assignment_2/group_7_ass_2_2024_re.ipynb | 218 +++++++++++++++++-
2 files changed, 430 insertions(+), 6 deletions(-)
diff --git a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
index d4072634..605777fe 100644
--- a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
+++ b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "bc677f0d",
+ "id": "f72900b3",
"metadata": {},
"source": [
"## 1.2 Lists\n",
@@ -33,9 +33,221 @@
"8 - Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "ab6ac49c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]\n",
+ "\n",
+ "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']\n",
+ "\n",
+ "p2_list = [ 2 , 3, 4, 5 ]\n",
+ "\n",
+ "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] \n",
+ "\n",
+ "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
+ "\n",
+ "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9d8cec88",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The indices [0, 1, 4, 7] have np.nan values.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 1\n",
+ "\n",
+ "nan_values = [i for i, value in enumerate(f_list) if value is np.nan]\n",
+ "\n",
+ "print(f\"The indices {nan_values} have np.nan values.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "fbe22089",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 2\n",
+ "\n",
+ "p2_list_2 = p2_list * 4\n",
+ "\n",
+ "print(p2_list_2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "396e9b4d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "8\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 3\n",
+ "\n",
+ "print(len(f_list))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "160cab6a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'My teacher assistant is so boring.'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " # Answer 4\n",
+ "\n",
+ "answer_4 = ' '.join(text1)\n",
+ "answer_4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "29b4805e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'My TA My teacher assistant is so boring but is very funny'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " # Answer 5\n",
+ " \n",
+ "text1[-1] = text1[-1].rstrip('.')\n",
+ " \n",
+ "text_answer = ['My', 'TA']\n",
+ "\n",
+ "text_answer_b = ['but', 'is', 'very', 'funny']\n",
+ "\n",
+ "text_answer.extend(text1 + text_answer_b)\n",
+ "\n",
+ "answer_5 = ' '.join(text_answer)\n",
+ "answer_5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "e9ff96e3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The max value of values1 is 86 and is located in the 0 index.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 6.1\n",
+ " \n",
+ "max_index = values1.index(max(values1))\n",
+ " \n",
+ "print(f\"The max value of values1 is {max(values1)} and is located in the {max_index} index.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "7e7147b8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The min value of values1 is 0 and is located in the 7 index.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 6.2\n",
+ " \n",
+ "min_index = values1.index(min(values1))\n",
+ " \n",
+ "print(f\"The min value of values1 is {min(values1)} and is located in the {min_index} index.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ae532dfb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Names: ('CHRISTIAN SANTOS', 'CRISTIAN NASSER', 'ANGELICA KARINA', 'JOSE ROBERTO', 'ABEL FERNANDO', 'MEIR ALVARO', 'JOSELIN ALEXANDRA', 'LEONEL ARTURO', 'JOSE FELIPE', 'AFRANIA', 'BIANCA MARIETTE', 'ADRIAN ANDRE', 'DORKAS YOMIRA JHERMY', 'LADY ALY', 'HECTOR ANDRE', 'GUSTAVO', 'PERSEO MARCELO', 'MIGUEL ALONZO', 'NICOLAS', 'ROCIO GABRIELA', 'JANE CAMILA', 'MARIA ELISA', 'ALEJANDRO', 'KARLINE ROSMELI', 'STEPHY ROSARIO', 'VALERIA CECILIA', 'SEBASTIAN RENATO', 'JUAN CARLOS', 'MARIANA', 'ANDREA BRIZETH', 'ERICK JOSUE', 'JOSUE DANIEL', 'FABIO MANUEL', 'FERNANDA NICOLLE', 'ANGELA ADELINA', 'CESAR DANTE', 'GABRIELA ISABEL', 'ANGEL MAURICIO', 'JUAN DIEGO', 'ARONE', 'PERCY ALBERTH', 'KEVIN ARTURO', 'CESAR ERNESTO', 'CÉSAR AGUSTO', 'DIANA EDITH', 'RODRIGO FRANCO', 'GRETTEL ALEXANDRA', 'ROSA ANGELA', 'DANTE OMAR', 'YAJAIRA ALEXANDRA', 'JORGE ALBERTO', 'ALEXIS')\n",
+ "Last Names: ('CORNEJO SANCHEZ', 'ORELLANA QUISPE', 'MORALES CHOQUEHUANCA', 'GUIMARAY RIBEYRO', 'CAMACHO GAVIDIA', 'TINTAYA ORIHUELA', 'CHAVEZ MARTINEZ', 'FIGUEROA MURO', 'GOMEZ CRIBILLERO', 'PALOMINO SEGUÍN', 'LUZON CUEVA', 'SUAÑA ZEGARRA', 'SOTO POMACHAGUA', 'FIORENTINO MARTINEZ', 'LAMA MAVILA', 'MEZA HINOJO', 'LOZADA MURILLO', 'ZAMBRANO JIMENEZ', 'JACOBS LUQUE', 'VIDAL VIDAL', 'TORRES ANICAMA', 'LOPEZ ESTRADA', 'BOYCO ORAMS', 'DIAZ BERROSPI', 'RIEGA ESCALANTE', 'LEVANO TORRES', 'ESQUIVES BRAVO', 'PEREZ GONZALES', 'OTERO MAGUIÑA', 'CLAVO CAMPOS', 'AGUILAR GARCIA', 'CALDAS VELASQUEZ', 'SALAS NUÑEZ BORJA', 'PIZARRO VILLANES', 'QUILLATUPA MORALES', 'HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'HINOJOSA CAHUANA', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ', 'FLORES CADILLO')\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 7\n",
+ "\n",
+ "last_names, names = zip(*map(lambda x: x.split(', '), last_and_name))\n",
+ " # the lambda function using split divides each name from each last name\n",
+ " # the function zip takes the values and puts them in tuples\n",
+ " # \"last_names, names\" indicates the code to split the tuples in two separate lists\n",
+ "print(\"Names:\", names)\n",
+ "print(\"Last Names:\", last_names)"
+ ]
+ },
{
"cell_type": "markdown",
- "id": "29504dc0",
+ "id": "d6cdd96e",
"metadata": {},
"source": [
"## 1.3 Strings\n",
@@ -55,7 +267,7 @@
},
{
"cell_type": "markdown",
- "id": "c40b32db",
+ "id": "4cdc1b63",
"metadata": {},
"source": [
"## 1.4 Pandas\n",
diff --git a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
index d4072634..605777fe 100644
--- a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
+++ b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "bc677f0d",
+ "id": "f72900b3",
"metadata": {},
"source": [
"## 1.2 Lists\n",
@@ -33,9 +33,221 @@
"8 - Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "ab6ac49c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]\n",
+ "\n",
+ "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']\n",
+ "\n",
+ "p2_list = [ 2 , 3, 4, 5 ]\n",
+ "\n",
+ "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] \n",
+ "\n",
+ "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
+ "\n",
+ "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9d8cec88",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The indices [0, 1, 4, 7] have np.nan values.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 1\n",
+ "\n",
+ "nan_values = [i for i, value in enumerate(f_list) if value is np.nan]\n",
+ "\n",
+ "print(f\"The indices {nan_values} have np.nan values.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "fbe22089",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 2\n",
+ "\n",
+ "p2_list_2 = p2_list * 4\n",
+ "\n",
+ "print(p2_list_2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "396e9b4d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "8\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 3\n",
+ "\n",
+ "print(len(f_list))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "160cab6a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'My teacher assistant is so boring.'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " # Answer 4\n",
+ "\n",
+ "answer_4 = ' '.join(text1)\n",
+ "answer_4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "29b4805e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'My TA My teacher assistant is so boring but is very funny'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " # Answer 5\n",
+ " \n",
+ "text1[-1] = text1[-1].rstrip('.')\n",
+ " \n",
+ "text_answer = ['My', 'TA']\n",
+ "\n",
+ "text_answer_b = ['but', 'is', 'very', 'funny']\n",
+ "\n",
+ "text_answer.extend(text1 + text_answer_b)\n",
+ "\n",
+ "answer_5 = ' '.join(text_answer)\n",
+ "answer_5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "e9ff96e3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The max value of values1 is 86 and is located in the 0 index.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 6.1\n",
+ " \n",
+ "max_index = values1.index(max(values1))\n",
+ " \n",
+ "print(f\"The max value of values1 is {max(values1)} and is located in the {max_index} index.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "7e7147b8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The min value of values1 is 0 and is located in the 7 index.\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 6.2\n",
+ " \n",
+ "min_index = values1.index(min(values1))\n",
+ " \n",
+ "print(f\"The min value of values1 is {min(values1)} and is located in the {min_index} index.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ae532dfb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Names: ('CHRISTIAN SANTOS', 'CRISTIAN NASSER', 'ANGELICA KARINA', 'JOSE ROBERTO', 'ABEL FERNANDO', 'MEIR ALVARO', 'JOSELIN ALEXANDRA', 'LEONEL ARTURO', 'JOSE FELIPE', 'AFRANIA', 'BIANCA MARIETTE', 'ADRIAN ANDRE', 'DORKAS YOMIRA JHERMY', 'LADY ALY', 'HECTOR ANDRE', 'GUSTAVO', 'PERSEO MARCELO', 'MIGUEL ALONZO', 'NICOLAS', 'ROCIO GABRIELA', 'JANE CAMILA', 'MARIA ELISA', 'ALEJANDRO', 'KARLINE ROSMELI', 'STEPHY ROSARIO', 'VALERIA CECILIA', 'SEBASTIAN RENATO', 'JUAN CARLOS', 'MARIANA', 'ANDREA BRIZETH', 'ERICK JOSUE', 'JOSUE DANIEL', 'FABIO MANUEL', 'FERNANDA NICOLLE', 'ANGELA ADELINA', 'CESAR DANTE', 'GABRIELA ISABEL', 'ANGEL MAURICIO', 'JUAN DIEGO', 'ARONE', 'PERCY ALBERTH', 'KEVIN ARTURO', 'CESAR ERNESTO', 'CÉSAR AGUSTO', 'DIANA EDITH', 'RODRIGO FRANCO', 'GRETTEL ALEXANDRA', 'ROSA ANGELA', 'DANTE OMAR', 'YAJAIRA ALEXANDRA', 'JORGE ALBERTO', 'ALEXIS')\n",
+ "Last Names: ('CORNEJO SANCHEZ', 'ORELLANA QUISPE', 'MORALES CHOQUEHUANCA', 'GUIMARAY RIBEYRO', 'CAMACHO GAVIDIA', 'TINTAYA ORIHUELA', 'CHAVEZ MARTINEZ', 'FIGUEROA MURO', 'GOMEZ CRIBILLERO', 'PALOMINO SEGUÍN', 'LUZON CUEVA', 'SUAÑA ZEGARRA', 'SOTO POMACHAGUA', 'FIORENTINO MARTINEZ', 'LAMA MAVILA', 'MEZA HINOJO', 'LOZADA MURILLO', 'ZAMBRANO JIMENEZ', 'JACOBS LUQUE', 'VIDAL VIDAL', 'TORRES ANICAMA', 'LOPEZ ESTRADA', 'BOYCO ORAMS', 'DIAZ BERROSPI', 'RIEGA ESCALANTE', 'LEVANO TORRES', 'ESQUIVES BRAVO', 'PEREZ GONZALES', 'OTERO MAGUIÑA', 'CLAVO CAMPOS', 'AGUILAR GARCIA', 'CALDAS VELASQUEZ', 'SALAS NUÑEZ BORJA', 'PIZARRO VILLANES', 'QUILLATUPA MORALES', 'HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'HINOJOSA CAHUANA', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ', 'FLORES CADILLO')\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Answer 7\n",
+ "\n",
+ "last_names, names = zip(*map(lambda x: x.split(', '), last_and_name))\n",
+ " # the lambda function using split divides each name from each last name\n",
+ " # the function zip takes the values and puts them in tuples\n",
+ " # \"last_names, names\" indicates the code to split the tuples in two separate lists\n",
+ "print(\"Names:\", names)\n",
+ "print(\"Last Names:\", last_names)"
+ ]
+ },
{
"cell_type": "markdown",
- "id": "29504dc0",
+ "id": "d6cdd96e",
"metadata": {},
"source": [
"## 1.3 Strings\n",
@@ -55,7 +267,7 @@
},
{
"cell_type": "markdown",
- "id": "c40b32db",
+ "id": "4cdc1b63",
"metadata": {},
"source": [
"## 1.4 Pandas\n",
From 08d00224351b1aa40fa97ace27419e7254954601 Mon Sep 17 00:00:00 2001
From: Ilenia Alejandra <90498196+ILe2014@users.noreply.github.com>
Date: Mon, 8 Jan 2024 21:24:08 -0500
Subject: [PATCH 11/14] Added information
---
.../group_7_ass_2_2024_re-checkpoint.ipynb | 61 ++++++++++++++++++-
.../assignment_2/group_7_ass_2_2024_re.ipynb | 61 ++++++++++++++++++-
2 files changed, 120 insertions(+), 2 deletions(-)
diff --git a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
index d4072634..ffdfed5f 100644
--- a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
+++ b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
@@ -79,6 +79,65 @@
"Use DataFrame.append() or pd.concat() to add rows.\n",
"Use groupby() for aggregation tasks."
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f865ded4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 4\n",
+ "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
+ "df = df.drop(['Level', 'Year'], axis=1)\n",
+ "\n",
+ "# Agregamos la nueva fila solicitada\n",
+ "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
+ " 'OPEN': [14.27], \n",
+ " 'HIGH': [14.61], \n",
+ " 'LOW': [13.32], \n",
+ " 'CLOSE': [13.38]}) \n",
+ "\n",
+ "# Agregamos la nueva fila al DataFrame existente\n",
+ "df = pd.concat([df, new_row], ignore_index=True)\n",
+ "\n",
+ "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
+ "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
+ " labels=['Low', 'Medium', 'High'])\n",
+ "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7aae8eb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 5\n",
+ "# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
+ "average_close_by_year = df.groupby('Year')['CLOSE'].mean()\n",
+ "\n",
+ "# Redondea los valores a tres decimales\n",
+ "average_close_by_year = average_close_by_year.round(2)\n",
+ "\n",
+ "# Crea un nuevo DataFrame con los resultados\n",
+ "df_average_year = pd.DataFrame({'Year': average_close_by_year.index, 'Average Close': average_close_by_year.values})\n",
+ "df_average_year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1a698ed6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
+ "df = df['Level'].value_counts()\n",
+ "df"
+ ]
}
],
"metadata": {
@@ -97,7 +156,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.11.4"
}
},
"nbformat": 4,
diff --git a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
index d4072634..ffdfed5f 100644
--- a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
+++ b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
@@ -79,6 +79,65 @@
"Use DataFrame.append() or pd.concat() to add rows.\n",
"Use groupby() for aggregation tasks."
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f865ded4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 4\n",
+ "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
+ "df = df.drop(['Level', 'Year'], axis=1)\n",
+ "\n",
+ "# Agregamos la nueva fila solicitada\n",
+ "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
+ " 'OPEN': [14.27], \n",
+ " 'HIGH': [14.61], \n",
+ " 'LOW': [13.32], \n",
+ " 'CLOSE': [13.38]}) \n",
+ "\n",
+ "# Agregamos la nueva fila al DataFrame existente\n",
+ "df = pd.concat([df, new_row], ignore_index=True)\n",
+ "\n",
+ "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
+ "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
+ " labels=['Low', 'Medium', 'High'])\n",
+ "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7aae8eb9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 5\n",
+ "# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
+ "average_close_by_year = df.groupby('Year')['CLOSE'].mean()\n",
+ "\n",
+ "# Redondea los valores a tres decimales\n",
+ "average_close_by_year = average_close_by_year.round(2)\n",
+ "\n",
+ "# Crea un nuevo DataFrame con los resultados\n",
+ "df_average_year = pd.DataFrame({'Year': average_close_by_year.index, 'Average Close': average_close_by_year.values})\n",
+ "df_average_year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1a698ed6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
+ "df = df['Level'].value_counts()\n",
+ "df"
+ ]
}
],
"metadata": {
@@ -97,7 +156,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.11.4"
}
},
"nbformat": 4,
From 618d385061ead36f0a54e6fbed7f52d6738142f2 Mon Sep 17 00:00:00 2001
From: JamesMVa <111825034+JamesMVa@users.noreply.github.com>
Date: Mon, 8 Jan 2024 21:39:43 -0500
Subject: [PATCH 12/14] James code 1.4
---
.../group_7_ass_2_2024_re-checkpoint.ipynb | 873 +++++++++++++++++-
.../assignment_2/group_7_ass_2_2024_re.ipynb | 873 +++++++++++++++++-
2 files changed, 1694 insertions(+), 52 deletions(-)
diff --git a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
index 42b89b0b..917b1210 100644
--- a/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
+++ b/assignments/assignment_2/.ipynb_checkpoints/group_7_ass_2_2024_re-checkpoint.ipynb
@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 1,
"id": "ab6ac49c",
"metadata": {},
"outputs": [],
@@ -57,7 +57,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 2,
"id": "9d8cec88",
"metadata": {},
"outputs": [
@@ -79,7 +79,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 3,
"id": "fbe22089",
"metadata": {},
"outputs": [
@@ -101,7 +101,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 4,
"id": "396e9b4d",
"metadata": {},
"outputs": [
@@ -121,7 +121,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 5,
"id": "160cab6a",
"metadata": {},
"outputs": [
@@ -131,7 +131,7 @@
"'My teacher assistant is so boring.'"
]
},
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -145,7 +145,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 6,
"id": "29b4805e",
"metadata": {},
"outputs": [
@@ -155,7 +155,7 @@
"'My TA My teacher assistant is so boring but is very funny'"
]
},
- "execution_count": 8,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -177,7 +177,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 7,
"id": "e9ff96e3",
"metadata": {},
"outputs": [
@@ -199,7 +199,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 8,
"id": "7e7147b8",
"metadata": {},
"outputs": [
@@ -221,7 +221,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 9,
"id": "ae532dfb",
"metadata": {},
"outputs": [
@@ -267,7 +267,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 10,
"id": "045944b7",
"metadata": {},
"outputs": [],
@@ -277,7 +277,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"id": "a53dd7b1",
"metadata": {},
"outputs": [],
@@ -287,7 +287,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 12,
"id": "68fe62ee",
"metadata": {},
"outputs": [
@@ -316,7 +316,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 13,
"id": "b9a72a9c",
"metadata": {},
"outputs": [
@@ -341,7 +341,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 14,
"id": "5a939c7f",
"metadata": {},
"outputs": [
@@ -368,7 +368,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 15,
"id": "f6566bef",
"metadata": {},
"outputs": [
@@ -394,7 +394,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 16,
"id": "d5bb16d7",
"metadata": {},
"outputs": [
@@ -423,7 +423,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 17,
"id": "0fbd7c03",
"metadata": {},
"outputs": [
@@ -484,10 +484,568 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
+ "id": "9c2bd811",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8572 | \n",
+ " 12/29/2023 | \n",
+ " 12.55 | \n",
+ " 13.19 | \n",
+ " 12.36 | \n",
+ " 12.45 | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8577 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26\n",
+ "... ... ... ... ... ...\n",
+ "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
+ "\n",
+ "[8577 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1\n",
+ "import pandas as pd\n",
+ "\n",
+ "# This code loads the data from the provided URL.\n",
+ "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
+ "df = pd.read_csv(url)\n",
+ "# We check the data assigned to the variable 'df'.\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "594fd5af",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Primeras cinco filas del DataFrame:\n",
+ " DATE OPEN HIGH LOW CLOSE\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26\n",
+ "\n",
+ "Estadísticas resumidas del DataFrame:\n",
+ " OPEN HIGH LOW CLOSE\n",
+ "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
+ "mean 19.666454 20.474364 18.914884 19.580374\n",
+ "std 7.979066 8.439927 7.469827 7.906214\n",
+ "min 9.010000 9.310000 8.560000 9.140000\n",
+ "25% 13.940000 14.540000 13.400000 13.880000\n",
+ "50% 17.790000 18.470000 17.220000 17.760000\n",
+ "75% 23.100000 23.960000 22.320000 22.990000\n",
+ "max 82.690000 89.530000 72.760000 82.690000\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 2\n",
+ "# We display the first five rows of the DataFrame.\n",
+ "print(\"Primeras cinco filas del DataFrame:\")\n",
+ "print(df.head())\n",
+ "\n",
+ "# We display the summarized statistics of the DataFrame.\n",
+ "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
+ "print(df.describe())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "97f1a822",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ " Level | \n",
+ " Year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8572 | \n",
+ " 12/29/2023 | \n",
+ " 12.55 | \n",
+ " 13.19 | \n",
+ " 12.36 | \n",
+ " 12.45 | \n",
+ " Low | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8577 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE Level Year\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
+ "... ... ... ... ... ... ... ...\n",
+ "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
+ "\n",
+ "[8577 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3\n",
+ "# We add a 'Level' column to categorize the 'CLOSE' values\n",
+ "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
+ " labels=['Low', 'Medium', 'High'])\n",
+ "\n",
+ "# We add the 'Year' column extracted from the 'DATE' column, keeping only the year using the dt.year function.\n",
+ "# Firstly, the 'DATE' variable was categorized as datetime.\n",
+ "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
+ "\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
"id": "f865ded4",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ " Level | \n",
+ " Year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8577 | \n",
+ " 01/06/2024 | \n",
+ " 14.27 | \n",
+ " 14.61 | \n",
+ " 13.32 | \n",
+ " 13.38 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8578 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE Level Year\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
+ "... ... ... ... ... ... ... ...\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
+ "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
+ "\n",
+ "[8578 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 4\n",
"# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
@@ -512,10 +1070,259 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"id": "7aae8eb9",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Average Close | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1990 | \n",
+ " 23.06 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1991 | \n",
+ " 18.37 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1992 | \n",
+ " 15.45 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1993 | \n",
+ " 12.69 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1994 | \n",
+ " 13.93 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 1995 | \n",
+ " 12.39 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 1996 | \n",
+ " 16.44 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 1997 | \n",
+ " 22.36 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 1998 | \n",
+ " 25.60 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 1999 | \n",
+ " 24.37 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2000 | \n",
+ " 23.32 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2001 | \n",
+ " 25.75 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 2002 | \n",
+ " 27.29 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2003 | \n",
+ " 21.98 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 2004 | \n",
+ " 15.48 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 2005 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 2006 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 2007 | \n",
+ " 17.54 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 2008 | \n",
+ " 32.70 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 2009 | \n",
+ " 31.48 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 2010 | \n",
+ " 22.55 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 2011 | \n",
+ " 24.20 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 2012 | \n",
+ " 17.80 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 2013 | \n",
+ " 14.23 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 2014 | \n",
+ " 14.18 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 2015 | \n",
+ " 16.67 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 2016 | \n",
+ " 15.83 | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 2017 | \n",
+ " 11.09 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 2018 | \n",
+ " 16.64 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 2019 | \n",
+ " 15.39 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 2020 | \n",
+ " 29.25 | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 2021 | \n",
+ " 19.66 | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 2022 | \n",
+ " 25.64 | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 2023 | \n",
+ " 16.85 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 2024 | \n",
+ " 13.62 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Year Average Close\n",
+ "0 1990 23.06\n",
+ "1 1991 18.37\n",
+ "2 1992 15.45\n",
+ "3 1993 12.69\n",
+ "4 1994 13.93\n",
+ "5 1995 12.39\n",
+ "6 1996 16.44\n",
+ "7 1997 22.36\n",
+ "8 1998 25.60\n",
+ "9 1999 24.37\n",
+ "10 2000 23.32\n",
+ "11 2001 25.75\n",
+ "12 2002 27.29\n",
+ "13 2003 21.98\n",
+ "14 2004 15.48\n",
+ "15 2005 12.81\n",
+ "16 2006 12.81\n",
+ "17 2007 17.54\n",
+ "18 2008 32.70\n",
+ "19 2009 31.48\n",
+ "20 2010 22.55\n",
+ "21 2011 24.20\n",
+ "22 2012 17.80\n",
+ "23 2013 14.23\n",
+ "24 2014 14.18\n",
+ "25 2015 16.67\n",
+ "26 2016 15.83\n",
+ "27 2017 11.09\n",
+ "28 2018 16.64\n",
+ "29 2019 15.39\n",
+ "30 2020 29.25\n",
+ "31 2021 19.66\n",
+ "32 2022 25.64\n",
+ "33 2023 16.85\n",
+ "34 2024 13.62"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 5\n",
"# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
@@ -531,10 +1338,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"id": "1a698ed6",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Low 5276\n",
+ "Medium 2582\n",
+ "High 720\n",
+ "Name: Level, dtype: int64"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
"df = df['Level'].value_counts()\n",
@@ -558,7 +1379,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.4"
+ "version": "3.9.7"
}
},
"nbformat": 4,
diff --git a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
index 42b89b0b..917b1210 100644
--- a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
+++ b/assignments/assignment_2/group_7_ass_2_2024_re.ipynb
@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 1,
"id": "ab6ac49c",
"metadata": {},
"outputs": [],
@@ -57,7 +57,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 2,
"id": "9d8cec88",
"metadata": {},
"outputs": [
@@ -79,7 +79,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 3,
"id": "fbe22089",
"metadata": {},
"outputs": [
@@ -101,7 +101,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 4,
"id": "396e9b4d",
"metadata": {},
"outputs": [
@@ -121,7 +121,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 5,
"id": "160cab6a",
"metadata": {},
"outputs": [
@@ -131,7 +131,7 @@
"'My teacher assistant is so boring.'"
]
},
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -145,7 +145,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 6,
"id": "29b4805e",
"metadata": {},
"outputs": [
@@ -155,7 +155,7 @@
"'My TA My teacher assistant is so boring but is very funny'"
]
},
- "execution_count": 8,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -177,7 +177,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 7,
"id": "e9ff96e3",
"metadata": {},
"outputs": [
@@ -199,7 +199,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 8,
"id": "7e7147b8",
"metadata": {},
"outputs": [
@@ -221,7 +221,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 9,
"id": "ae532dfb",
"metadata": {},
"outputs": [
@@ -267,7 +267,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 10,
"id": "045944b7",
"metadata": {},
"outputs": [],
@@ -277,7 +277,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"id": "a53dd7b1",
"metadata": {},
"outputs": [],
@@ -287,7 +287,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 12,
"id": "68fe62ee",
"metadata": {},
"outputs": [
@@ -316,7 +316,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 13,
"id": "b9a72a9c",
"metadata": {},
"outputs": [
@@ -341,7 +341,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 14,
"id": "5a939c7f",
"metadata": {},
"outputs": [
@@ -368,7 +368,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 15,
"id": "f6566bef",
"metadata": {},
"outputs": [
@@ -394,7 +394,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 16,
"id": "d5bb16d7",
"metadata": {},
"outputs": [
@@ -423,7 +423,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 17,
"id": "0fbd7c03",
"metadata": {},
"outputs": [
@@ -484,10 +484,568 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
+ "id": "9c2bd811",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8572 | \n",
+ " 12/29/2023 | \n",
+ " 12.55 | \n",
+ " 13.19 | \n",
+ " 12.36 | \n",
+ " 12.45 | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8577 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26\n",
+ "... ... ... ... ... ...\n",
+ "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
+ "\n",
+ "[8577 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1\n",
+ "import pandas as pd\n",
+ "\n",
+ "# This code loads the data from the provided URL.\n",
+ "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
+ "df = pd.read_csv(url)\n",
+ "# We check the data assigned to the variable 'df'.\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "594fd5af",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Primeras cinco filas del DataFrame:\n",
+ " DATE OPEN HIGH LOW CLOSE\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26\n",
+ "\n",
+ "Estadísticas resumidas del DataFrame:\n",
+ " OPEN HIGH LOW CLOSE\n",
+ "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
+ "mean 19.666454 20.474364 18.914884 19.580374\n",
+ "std 7.979066 8.439927 7.469827 7.906214\n",
+ "min 9.010000 9.310000 8.560000 9.140000\n",
+ "25% 13.940000 14.540000 13.400000 13.880000\n",
+ "50% 17.790000 18.470000 17.220000 17.760000\n",
+ "75% 23.100000 23.960000 22.320000 22.990000\n",
+ "max 82.690000 89.530000 72.760000 82.690000\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 2\n",
+ "# We display the first five rows of the DataFrame.\n",
+ "print(\"Primeras cinco filas del DataFrame:\")\n",
+ "print(df.head())\n",
+ "\n",
+ "# We display the summarized statistics of the DataFrame.\n",
+ "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
+ "print(df.describe())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "97f1a822",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ " Level | \n",
+ " Year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8572 | \n",
+ " 12/29/2023 | \n",
+ " 12.55 | \n",
+ " 13.19 | \n",
+ " 12.36 | \n",
+ " 12.45 | \n",
+ " Low | \n",
+ " 2023 | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8577 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE Level Year\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
+ "... ... ... ... ... ... ... ...\n",
+ "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
+ "\n",
+ "[8577 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3\n",
+ "# We add a 'Level' column to categorize the 'CLOSE' values\n",
+ "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
+ " labels=['Low', 'Medium', 'High'])\n",
+ "\n",
+ "# We add the 'Year' column extracted from the 'DATE' column, keeping only the year using the dt.year function.\n",
+ "# Firstly, the 'DATE' variable was categorized as datetime.\n",
+ "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
+ "\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
"id": "f865ded4",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " OPEN | \n",
+ " HIGH | \n",
+ " LOW | \n",
+ " CLOSE | \n",
+ " Level | \n",
+ " Year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 01/02/1990 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " 17.24 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 01/03/1990 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " 18.19 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 01/04/1990 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " 19.22 | \n",
+ " Low | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 01/05/1990 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " 20.11 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 01/08/1990 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " 20.26 | \n",
+ " Medium | \n",
+ " 1990 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8573 | \n",
+ " 01/02/2024 | \n",
+ " 13.22 | \n",
+ " 14.23 | \n",
+ " 13.10 | \n",
+ " 13.20 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8574 | \n",
+ " 01/03/2024 | \n",
+ " 13.35 | \n",
+ " 14.22 | \n",
+ " 13.33 | \n",
+ " 14.04 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8575 | \n",
+ " 01/04/2024 | \n",
+ " 13.93 | \n",
+ " 14.20 | \n",
+ " 13.64 | \n",
+ " 14.13 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8576 | \n",
+ " 01/05/2024 | \n",
+ " 14.24 | \n",
+ " 14.58 | \n",
+ " 13.29 | \n",
+ " 13.35 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ " | 8577 | \n",
+ " 01/06/2024 | \n",
+ " 14.27 | \n",
+ " 14.61 | \n",
+ " 13.32 | \n",
+ " 13.38 | \n",
+ " Low | \n",
+ " 2024 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8578 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE OPEN HIGH LOW CLOSE Level Year\n",
+ "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
+ "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
+ "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
+ "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
+ "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
+ "... ... ... ... ... ... ... ...\n",
+ "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
+ "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
+ "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
+ "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
+ "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
+ "\n",
+ "[8578 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 4\n",
"# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
@@ -512,10 +1070,259 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"id": "7aae8eb9",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Average Close | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1990 | \n",
+ " 23.06 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1991 | \n",
+ " 18.37 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1992 | \n",
+ " 15.45 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1993 | \n",
+ " 12.69 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1994 | \n",
+ " 13.93 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 1995 | \n",
+ " 12.39 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 1996 | \n",
+ " 16.44 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 1997 | \n",
+ " 22.36 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 1998 | \n",
+ " 25.60 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 1999 | \n",
+ " 24.37 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2000 | \n",
+ " 23.32 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2001 | \n",
+ " 25.75 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 2002 | \n",
+ " 27.29 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2003 | \n",
+ " 21.98 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 2004 | \n",
+ " 15.48 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 2005 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 2006 | \n",
+ " 12.81 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 2007 | \n",
+ " 17.54 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 2008 | \n",
+ " 32.70 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 2009 | \n",
+ " 31.48 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 2010 | \n",
+ " 22.55 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 2011 | \n",
+ " 24.20 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 2012 | \n",
+ " 17.80 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 2013 | \n",
+ " 14.23 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 2014 | \n",
+ " 14.18 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 2015 | \n",
+ " 16.67 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 2016 | \n",
+ " 15.83 | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 2017 | \n",
+ " 11.09 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 2018 | \n",
+ " 16.64 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 2019 | \n",
+ " 15.39 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 2020 | \n",
+ " 29.25 | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 2021 | \n",
+ " 19.66 | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 2022 | \n",
+ " 25.64 | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 2023 | \n",
+ " 16.85 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 2024 | \n",
+ " 13.62 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Year Average Close\n",
+ "0 1990 23.06\n",
+ "1 1991 18.37\n",
+ "2 1992 15.45\n",
+ "3 1993 12.69\n",
+ "4 1994 13.93\n",
+ "5 1995 12.39\n",
+ "6 1996 16.44\n",
+ "7 1997 22.36\n",
+ "8 1998 25.60\n",
+ "9 1999 24.37\n",
+ "10 2000 23.32\n",
+ "11 2001 25.75\n",
+ "12 2002 27.29\n",
+ "13 2003 21.98\n",
+ "14 2004 15.48\n",
+ "15 2005 12.81\n",
+ "16 2006 12.81\n",
+ "17 2007 17.54\n",
+ "18 2008 32.70\n",
+ "19 2009 31.48\n",
+ "20 2010 22.55\n",
+ "21 2011 24.20\n",
+ "22 2012 17.80\n",
+ "23 2013 14.23\n",
+ "24 2014 14.18\n",
+ "25 2015 16.67\n",
+ "26 2016 15.83\n",
+ "27 2017 11.09\n",
+ "28 2018 16.64\n",
+ "29 2019 15.39\n",
+ "30 2020 29.25\n",
+ "31 2021 19.66\n",
+ "32 2022 25.64\n",
+ "33 2023 16.85\n",
+ "34 2024 13.62"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 5\n",
"# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
@@ -531,10 +1338,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"id": "1a698ed6",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Low 5276\n",
+ "Medium 2582\n",
+ "High 720\n",
+ "Name: Level, dtype: int64"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
"df = df['Level'].value_counts()\n",
@@ -558,7 +1379,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.4"
+ "version": "3.9.7"
}
},
"nbformat": 4,
From d5d3082f0525a18a305793b018891c12284c9662 Mon Sep 17 00:00:00 2001
From: Rafael Vargas <90073975+rafa0303@users.noreply.github.com>
Date: Mon, 8 Jan 2024 21:43:19 -0500
Subject: [PATCH 13/14] Delete
assignments/assignment_2/group_7_ass_2_2024.ipynb
---
.../assignment_2/group_7_ass_2_2024.ipynb | 1479 -----------------
1 file changed, 1479 deletions(-)
delete mode 100644 assignments/assignment_2/group_7_ass_2_2024.ipynb
diff --git a/assignments/assignment_2/group_7_ass_2_2024.ipynb b/assignments/assignment_2/group_7_ass_2_2024.ipynb
deleted file mode 100644
index 007f4559..00000000
--- a/assignments/assignment_2/group_7_ass_2_2024.ipynb
+++ /dev/null
@@ -1,1479 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 1 Assignment 2\n",
- "\n",
- "It is totally prohibited to use any kind of loop. You can use stackoverflow. If you copy codes from previous answers, explain each step. No explanation is `0 points`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Lists\n",
- "1. Show the indices of the `np.nan` values in the `f_list` list. We want to see this output: `The indices 0, 1, 4, 7 have np.nan values.` **Hint: Use print function and [f-strings](https://realpython.com/python-f-strings/) to insert the indices values.**
\n",
- "\n",
- "2. Replicate 4 times the values of the list `p2_list`. We expect an ouput like this: `[ 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5, 2 , 3, 4, 5]`. **Hint: Use multiplication function in `lists`and see the output.**
\n",
- "3. Print the length of `f_list`. **Hint: Length function**
\n",
- "4. Print `My teacher assistant is so boring.` using `text1` list. **Hint: Use the [`join` function](https://stackoverflow.com/questions/493819/why-is-it-string-joinlist-instead-of-list-joinstring)**
\n",
- "5. Print `My TA is so boring, but is very funny.` using `text1` list.**Hint: Use the `join` function, and `extend` method.**
\n",
- "6. Print
\n",
- "`The max value of values1 is 86 and is located in the 0 index. `
\n",
- "`The min value of values1 is 0 and is located in the 7 index. `
**Hint: Use the `f-string`, `min`, and `max` functions.**\n",
- "
\n",
- "7. Get two lists: `names` and `last_names` using `last_and_name` list. **Hint: Use `map` and `split`.**\n",
- "
\n",
- "8. Give only the last names of students who do not have email. Use the `emails` and `last_names` listt. **Hint: Use `map` and `split`.**\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [],
- "source": [
- "f_list = [np.nan , np.nan, \"Austria\", \"Germany\", np.nan, \"Pakistan\", \"np.nan\", np.nan ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [],
- "source": [
- "text1 = ['My', 'teacher', 'assistant', 'is', 'so', 'boring.']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [],
- "source": [
- "p2_list = [ 2 , 3, 4, 5 ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
- "metadata": {},
- "outputs": [],
- "source": [
- "values1 = [ 86, 86, 85, 85, 85, 83, 23, 0, 84, 1 ] "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "# These two lists a\n",
- "last_and_name = [ \"CORNEJO SANCHEZ, CHRISTIAN SANTOS\", \"ORELLANA QUISPE, CRISTIAN NASSER\", \"MORALES CHOQUEHUANCA, ANGELICA KARINA\", \"GUIMARAY RIBEYRO, JOSE ROBERTO\", \"CAMACHO GAVIDIA, ABEL FERNANDO\", \"TINTAYA ORIHUELA, MEIR ALVARO\", \"CHAVEZ MARTINEZ, JOSELIN ALEXANDRA\", \"FIGUEROA MURO, LEONEL ARTURO\", \"GOMEZ CRIBILLERO, JOSE FELIPE\", \"PALOMINO SEGUÍN, AFRANIA\", \"LUZON CUEVA, BIANCA MARIETTE\", \"SUAÑA ZEGARRA, ADRIAN ANDRE\", \"SOTO POMACHAGUA, DORKAS YOMIRA JHERMY\", \"FIORENTINO MARTINEZ, LADY ALY\", \"LAMA MAVILA, HECTOR ANDRE\", \"MEZA HINOJO, GUSTAVO\", \"LOZADA MURILLO, PERSEO MARCELO\", \"ZAMBRANO JIMENEZ, MIGUEL ALONZO\", \"JACOBS LUQUE, NICOLAS\", \"VIDAL VIDAL, ROCIO GABRIELA\", \"TORRES ANICAMA, JANE CAMILA\", \"LOPEZ ESTRADA, MARIA ELISA\", \"BOYCO ORAMS, ALEJANDRO\", \"DIAZ BERROSPI, KARLINE ROSMELI\", \"RIEGA ESCALANTE, STEPHY ROSARIO\", \"LEVANO TORRES, VALERIA CECILIA\", \"ESQUIVES BRAVO, SEBASTIAN RENATO\", \"PEREZ GONZALES, JUAN CARLOS\", \"OTERO MAGUIÑA, MARIANA\", \"CLAVO CAMPOS, ANDREA BRIZETH\", \"AGUILAR GARCIA, ERICK JOSUE\", \"CALDAS VELASQUEZ, JOSUE DANIEL\", \"SALAS NUÑEZ BORJA, FABIO MANUEL\", \"PIZARRO VILLANES, FERNANDA NICOLLE\", \"QUILLATUPA MORALES, ANGELA ADELINA\", \"HUANCAYA IDONE, CESAR DANTE\", \"CALVO PORTOCARRERO, GABRIELA ISABEL\", \"IBAÑEZ ABANTO, ANGEL MAURICIO\", \"MELÉNDEZ APONTE, JUAN DIEGO\", \"CRISTIAN SERRANO, ARONE\", \"HINOJOSA CAHUANA, PERCY ALBERTH\", \"ANGLAS GARCÍA, KEVIN ARTURO\", \"ALDAVE ACOSTA, CESAR ERNESTO\", \"NÚÑEZ HUAMÁN, CÉSAR AGUSTO\", \"OBREGON HUAMAN, DIANA EDITH\", \"SOTO PACHERRES, RODRIGO FRANCO\", \"INGARUCA RIVERA, GRETTEL ALEXANDRA\", \"ROJAS HUAMAN, ROSA ANGELA\", \"NEYRA SALAS, DANTE OMAR\", \"HUERTA ESPINOZA, YAJAIRA ALEXANDRA\", \"HUANCA MARTINEZ, JORGE ALBERTO\", \"FLORES CADILLO, ALEXIS\" ]\n",
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\", \"\", \"\", \"\", \"\", \"\", \"f0873079@pucp.edu.pe\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"\", \"flores.alexis@pucp.edu.pe\", ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The indices [0, 1, 4, 7] have np.nan values.\n"
- ]
- }
- ],
- "source": [
- " # Answer 1\n",
- "\n",
- "nan_values = [i for i, value in enumerate(f_list) if value is np.nan]\n",
- "\n",
- "print(f\"The indices {nan_values} have np.nan values.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5]\n"
- ]
- }
- ],
- "source": [
- " # Answer 2\n",
- "\n",
- "p2_list_2 = p2_list * 4\n",
- "\n",
- "print(p2_list_2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "8\n"
- ]
- }
- ],
- "source": [
- " # Answer 3\n",
- "\n",
- "print(len(f_list))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My teacher assistant is so boring.'"
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- " # Answer 4\n",
- "\n",
- "answer_4 = ' '.join(text1)\n",
- "answer_4"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'My TA My teacher assistant is so boring but is very funny'"
- ]
- },
- "execution_count": 43,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- " # Answer 5\n",
- " \n",
- "text1[-1] = text1[-1].rstrip('.')\n",
- " \n",
- "text_answer = ['My', 'TA']\n",
- "\n",
- "text_answer_b = ['but', 'is', 'very', 'funny']\n",
- "\n",
- "text_answer.extend(text1 + text_answer_b)\n",
- "\n",
- "answer_5 = ' '.join(text_answer)\n",
- "answer_5"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The max value of values1 is 86 and is located in the 0 index.\n"
- ]
- }
- ],
- "source": [
- " # Answer 6.1\n",
- " \n",
- "max_index = values1.index(max(values1))\n",
- " \n",
- "print(f\"The max value of values1 is {max(values1)} and is located in the {max_index} index.\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The min value of values1 is 0 and is located in the 7 index.\n"
- ]
- }
- ],
- "source": [
- " # Answer 6.2\n",
- " \n",
- "min_index = values1.index(min(values1))\n",
- " \n",
- "print(f\"The min value of values1 is {min(values1)} and is located in the {min_index} index.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Names: ('CHRISTIAN SANTOS', 'CRISTIAN NASSER', 'ANGELICA KARINA', 'JOSE ROBERTO', 'ABEL FERNANDO', 'MEIR ALVARO', 'JOSELIN ALEXANDRA', 'LEONEL ARTURO', 'JOSE FELIPE', 'AFRANIA', 'BIANCA MARIETTE', 'ADRIAN ANDRE', 'DORKAS YOMIRA JHERMY', 'LADY ALY', 'HECTOR ANDRE', 'GUSTAVO', 'PERSEO MARCELO', 'MIGUEL ALONZO', 'NICOLAS', 'ROCIO GABRIELA', 'JANE CAMILA', 'MARIA ELISA', 'ALEJANDRO', 'KARLINE ROSMELI', 'STEPHY ROSARIO', 'VALERIA CECILIA', 'SEBASTIAN RENATO', 'JUAN CARLOS', 'MARIANA', 'ANDREA BRIZETH', 'ERICK JOSUE', 'JOSUE DANIEL', 'FABIO MANUEL', 'FERNANDA NICOLLE', 'ANGELA ADELINA', 'CESAR DANTE', 'GABRIELA ISABEL', 'ANGEL MAURICIO', 'JUAN DIEGO', 'ARONE', 'PERCY ALBERTH', 'KEVIN ARTURO', 'CESAR ERNESTO', 'CÉSAR AGUSTO', 'DIANA EDITH', 'RODRIGO FRANCO', 'GRETTEL ALEXANDRA', 'ROSA ANGELA', 'DANTE OMAR', 'YAJAIRA ALEXANDRA', 'JORGE ALBERTO', 'ALEXIS')\n",
- "Last Names: ('CORNEJO SANCHEZ', 'ORELLANA QUISPE', 'MORALES CHOQUEHUANCA', 'GUIMARAY RIBEYRO', 'CAMACHO GAVIDIA', 'TINTAYA ORIHUELA', 'CHAVEZ MARTINEZ', 'FIGUEROA MURO', 'GOMEZ CRIBILLERO', 'PALOMINO SEGUÍN', 'LUZON CUEVA', 'SUAÑA ZEGARRA', 'SOTO POMACHAGUA', 'FIORENTINO MARTINEZ', 'LAMA MAVILA', 'MEZA HINOJO', 'LOZADA MURILLO', 'ZAMBRANO JIMENEZ', 'JACOBS LUQUE', 'VIDAL VIDAL', 'TORRES ANICAMA', 'LOPEZ ESTRADA', 'BOYCO ORAMS', 'DIAZ BERROSPI', 'RIEGA ESCALANTE', 'LEVANO TORRES', 'ESQUIVES BRAVO', 'PEREZ GONZALES', 'OTERO MAGUIÑA', 'CLAVO CAMPOS', 'AGUILAR GARCIA', 'CALDAS VELASQUEZ', 'SALAS NUÑEZ BORJA', 'PIZARRO VILLANES', 'QUILLATUPA MORALES', 'HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'HINOJOSA CAHUANA', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ', 'FLORES CADILLO')\n"
- ]
- }
- ],
- "source": [
- " # Answer 7\n",
- "\n",
- "last_names, names = zip(*map(lambda x: x.split(', '), last_and_name))\n",
- " # the lambda function using split divides each name from each last name\n",
- " # the function zip takes the values and puts them in tuples\n",
- " # \"last_names, names\" indicates the code to split the tuples in two separate lists\n",
- "print(\"Names:\", names)\n",
- "print(\"Last Names:\", last_names)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['HUANCAYA IDONE', 'CALVO PORTOCARRERO', 'IBAÑEZ ABANTO', 'MELÉNDEZ APONTE', 'CRISTIAN SERRANO', 'ANGLAS GARCÍA', 'ALDAVE ACOSTA', 'NÚÑEZ HUAMÁN', 'OBREGON HUAMAN', 'SOTO PACHERRES', 'INGARUCA RIVERA', 'ROJAS HUAMAN', 'NEYRA SALAS', 'HUERTA ESPINOZA', 'HUANCA MARTINEZ']\n"
- ]
- }
- ],
- "source": [
- " # Answer 8\n",
- " \n",
- "answer_8 = [last_name for last_name, email in zip(last_names, emails) if not email]\n",
- " # zip function pairs last names with its corresponding emails into tuples\n",
- " # \"last_name for last_name, email in ... if not email\" indicates the last name for each pair if email is empty\n",
- "print(answer_8)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.3 Strings\n",
- "\n",
- "\n",
- "1. Drop the duplicated blank spaces in `str1`. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.**Hint: Use `split` method and `join` function.**\n",
- "\n",
- "2. Get the number of letters in the string .**Hint: Use `len`function.**\n",
- "\n",
- "3. Get the number of blank spaces (all of them) in the string .**Hint: Use `len`function.**\n",
- "\n",
- "4. Get the position of `@` in each string in the emails list. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "5. Identifies if exists `.edu.` in each string in the `emails` list. Get a list of Booleans. **Hint: Use `map`function and `find` method.**\n",
- "\n",
- "6. Get all the strings before the first dot `.`in each string in the `emails` list. Identifies how many of them has `@`. **Hint: Use `map`function and `find` method.**"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Dear professor, the reason why I created this new branch is to correct the fact that I mistakenly edited the main branch of my group when I was writing my part. So, I was the one in charge to complete the \"Strings\" part, and here it is. I hope you can understand this."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [],
- "source": [
- "str1 = 'I am too old'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [],
- "source": [
- "emails = [\"cscornejo@pucp.edu.pe\", \"orellana.cn@pucp.edu.pe\", \"karina.morales@pucp.edu.pe\", \"a20083223@pucp.pe\", \"abel.camacho@pucp.pe\", \"mtintaya@pucp.edu.pe\", \"joselin.chavez@pucp.edu.pe\", \"a20105737@pucp.pe\", \"jfgomezc@pucp.pe\", \"afrania.palomino@pucp.pe\", \"luzon.bianca@pucp.pe\", \"adrian.suanaz@pucp.pe\", \"soto.y@pucp.edu.pe\", \"a20132766@pucp.pe\", \"andre.lama@pucp.edu.pe\", \"gustavo.meza@pucp.edu.pe\", \"pmlozada@pucp.edu.pe\", \"m.zambranoj@pucp.edu.pe\", \"nicolas.jacobs@pucp.edu.pe\", \"gvidal@pucp.edu.pe\", \"jane.torres@pucp.edu.pe\", \"m.lopez@pucp.edu.pe\", \"alejandro.boyco@pucp.edu.pe\", \"a20167070@pucp.edu.pe\", \"riega.stephy@pucp.edu.pe\", \"vlevanot@pucp.edu.pe\", \"sesquives@pucp.edu.pe\", \"perez.juanc@pucp.edu.pe\", \"mariana.otero@pucp.edu.pe\", \"aclavo@pucp.edu.pe\", \"a20182474@pucp.edu.pe\", \"josue.caldas@pucp.edu.pe\", \"fabio.salas@pucp.edu.pe\", \"fernanda.pizarro@pucp.edu.pe\", \"aquillatupa@pucp.pe\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 50,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "I am too old\n"
- ]
- }
- ],
- "source": [
- "# 1. Drop the duplicated blank spaces in str1. Do not use the regular expresions. Do not use the code shown in class. Explain your steps.Hint: Use split method and join function.\n",
- "\n",
- "# First, we split the string into a list of words\n",
- "list_words = str1.split()\n",
- "\n",
- "# Then, we remove the empty strings from the list\n",
- "non_empty_words = [word for word in list_words if word]\n",
- "\n",
- "# Lastly, we join the non-empty words back into a string\n",
- "result_str = ' '.join(non_empty_words)\n",
- "\n",
- "print(result_str)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 51,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Number of letters: 9\n"
- ]
- }
- ],
- "source": [
- "# 2. Get the number of letters in the string .Hint: Use lenfunction.\n",
- "\n",
- "# In order to count the number of letters, we need to remove non-alphabetic characters first, we do that \n",
- "# using the len function (to printe the lenght)\n",
- "# and verifying that the character is alphabetic (a letter), using the if function.\n",
- "num_letters = len([char for char in str1 if char.isalpha()])\n",
- "\n",
- "# This is a better way to print the answer, including the string value \"Number of letters\"\n",
- "print(\"Number of letters:\", num_letters)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 52,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Number of blank spaces: 85\n"
- ]
- }
- ],
- "source": [
- "# 3. Get the number of blank spaces (all of them) in the string .Hint: Use lenfunction.\n",
- "\n",
- "# First, we create a list with the blank spaces, using the \"isspace\" method\n",
- "blank_space = [char for char in str1 if char.isspace()]\n",
- "\n",
- "# Then we apply the function len() to the list to get the total number of blank spaces\n",
- "num_blank_spaces = len(blank_space)\n",
- "\n",
- "# Finally, we print the result\n",
- "print(\"Number of blank spaces:\", num_blank_spaces)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Positions of '@': [9, 11, 14, 9, 12, 8, 14, 9, 8, 16, 12, 13, 6, 9, 10, 12, 8, 11, 14, 6, 11, 7, 15, 9, 12, 8, 9, 11, 13, 6, 9, 12, 11, 16, 11]\n"
- ]
- }
- ],
- "source": [
- "# 4. Get the position of @ in each string in the emails list. Hint: Use mapfunction and find method.\n",
- "\n",
- "# First we use the map() function to apply the specified lambda function to each element in the emails list. \n",
- "# Then, with the lambda function takes an email address (email) and finds the position of the '@' symbol using the find() method.\n",
- "# Also, list(...) Converts the result into a list.\n",
- "positions_at = list(map(lambda email: email.find('@'), emails))\n",
- "\n",
- "# Finally, we print the results\n",
- "print(\"Positions of '@':\", positions_at)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 54,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Contains '.edu.': [True, True, True, False, False, True, True, False, False, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]\n"
- ]
- }
- ],
- "source": [
- "# 5. Identifies if exists .edu. in each string in the emails list. Get a list of Booleans. Hint: Use mapfunction and find method. # \n",
- "\n",
- "# First, we use the map function to apply a lambda function to each element in the emails list.\n",
- "# The lambda function checks if '.edu.' is present in each email address.\n",
- "contains_edu_lambda = lambda email: '.edu.' in email\n",
- "mapped_result = map(contains_edu_lambda, emails)\n",
- "\n",
- "# Then, we convert the mapped result into a list.\n",
- "contains_edu_list = list(mapped_result)\n",
- "\n",
- "# Finally we print the result\n",
- "print(\"Contains '.edu.':\", contains_edu_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Substrings before the first dot: ['cscornejo@pucp', 'orellana', 'karina', 'a20083223@pucp', 'abel', 'mtintaya@pucp', 'joselin', 'a20105737@pucp', 'jfgomezc@pucp', 'afrania', 'luzon', 'adrian', 'soto', 'a20132766@pucp', 'andre', 'gustavo', 'pmlozada@pucp', 'm', 'nicolas', 'gvidal@pucp', 'jane', 'm', 'alejandro', 'a20167070@pucp', 'riega', 'vlevanot@pucp', 'sesquives@pucp', 'perez', 'mariana', 'aclavo@pucp', 'a20182474@pucp', 'josue', 'fabio', 'fernanda', 'aquillatupa@pucp']\n",
- "Number of substrings containing '@': 14\n"
- ]
- }
- ],
- "source": [
- "# 6. Get all the strings before the first dot '.' in each string in the emails list.\n",
- "# First, we identify how many of them have '@'.\n",
- "\n",
- "# Then we use the map function to apply a lambda function to each element in the emails list.\n",
- "# The lambda function extracts the substring before the first dot '.' and checks if it contains '@'.\n",
- "substring_before_dot_lambda = lambda email: email.split('.')[0]\n",
- "mapped_result = map(substring_before_dot_lambda, emails)\n",
- "\n",
- "# After that, we convert the mapped result into a list.\n",
- "substring_before_dot_list = list(mapped_result)\n",
- "\n",
- "# Also, we count how many substrings contain '@' and print the results\n",
- "count_with_at = sum('@' in substring for substring in substring_before_dot_list)\n",
- "\n",
- "print(\"Substrings before the first dot:\", substring_before_dot_list)\n",
- "print(\"Number of substrings containing '@':\", count_with_at)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.4 Pandas\n",
- "\n",
- "You can and should always ask ChatGPT, BARD, Bing, etc.\n",
- "In this exercise, you will work with financial data. Follow the steps below:\n",
- "\n",
- "1. Load Data:\n",
- " Load the financial data available at this URL: https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\n",
- " Use `pd.read_csv()` adn the link to load the data into a DataFrame.\n",
- "\n",
- "2. Explore Data:\n",
- " a) Display the first five rows of the DataFrame.\n",
- " b) Display the summary statistics of the DataFrame.\n",
- "\n",
- "3. Add Columns:\n",
- " a) Add a new column 'Level' that categorizes the 'Close' column values into 'Low' (< 20), 'Medium' (20-30), and 'High' (>30). You need to make a filter to make this categorization. You can see below some suggestions how to do it.\n",
- " b) Add a new column 'Year' extracted from the 'Date' column. You need to work with dates. You can do it directly workiwith the column splitting it or changing it to date format an dfollowing the below suggestion. \n",
- "\n",
- "4. Add Rows:\n",
- " a) Add a new row with a date of your choice and fill the other columns with appropriate values. \n",
- "\n",
- "5. Analysis:\n",
- " a) Calculate the average of the column 'Close' for each 'Year'. You need to group for this task.\n",
- " b) Count the number of 'High', 'Medium', and 'Low' Level days in the data.\n",
- "\n",
- "Suggestions:\n",
- " - Use `pd.cut()` for categorizing 'Close' values.\n",
- " - Use `pd.to_datetime()` and `dt.year` to extract the year from a date.\n",
- " - Use `DataFrame.append()` or `pd.concat()` to add rows.\n",
- " - Use `groupby()` for aggregation tasks."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 56,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1\n",
- "import pandas as pd\n",
- "\n",
- "# This code loads the data from the provided URL.\n",
- "url = \"https://raw.githubusercontent.com/datasets/finance-vix/main/data/vix-daily.csv\"\n",
- "df = pd.read_csv(url)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 57,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35\n",
- "\n",
- "[8577 rows x 5 columns]"
- ]
- },
- "execution_count": 57,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# We check the data assigned to the variable 'df'.\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 58,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Primeras cinco filas del DataFrame:\n",
- " DATE OPEN HIGH LOW CLOSE\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26\n",
- "\n",
- "Estadísticas resumidas del DataFrame:\n",
- " OPEN HIGH LOW CLOSE\n",
- "count 8577.000000 8577.000000 8577.000000 8577.000000\n",
- "mean 19.666454 20.474364 18.914884 19.580374\n",
- "std 7.979066 8.439927 7.469827 7.906214\n",
- "min 9.010000 9.310000 8.560000 9.140000\n",
- "25% 13.940000 14.540000 13.400000 13.880000\n",
- "50% 17.790000 18.470000 17.220000 17.760000\n",
- "75% 23.100000 23.960000 22.320000 22.990000\n",
- "max 82.690000 89.530000 72.760000 82.690000\n"
- ]
- }
- ],
- "source": [
- "# 2\n",
- "# We display the first five rows of the DataFrame.\n",
- "print(\"Primeras cinco filas del DataFrame:\")\n",
- "print(df.head())\n",
- "\n",
- "# We display the summarized statistics of the DataFrame.\n",
- "print(\"\\nEstadísticas resumidas del DataFrame:\")\n",
- "print(df.describe())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 59,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8572 | \n",
- " 12/29/2023 | \n",
- " 12.55 | \n",
- " 13.19 | \n",
- " 12.36 | \n",
- " 12.45 | \n",
- " Low | \n",
- " 2023 | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8577 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8572 12/29/2023 12.55 13.19 12.36 12.45 Low 2023\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "\n",
- "[8577 rows x 7 columns]"
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 3\n",
- "# We add a 'Level' column to categorize the 'CLOSE' values\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "\n",
- "# We add the 'Year' column extracted from the 'DATE' column, keeping only the year using the dt.year function.\n",
- "# Firstly, the 'DATE' variable was categorized as datetime.\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 60,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " DATE | \n",
- " OPEN | \n",
- " HIGH | \n",
- " LOW | \n",
- " CLOSE | \n",
- " Level | \n",
- " Year | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 01/02/1990 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " 17.24 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 01/03/1990 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " 18.19 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 01/04/1990 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " 19.22 | \n",
- " Low | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 01/05/1990 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " 20.11 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 01/08/1990 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " 20.26 | \n",
- " Medium | \n",
- " 1990 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 8573 | \n",
- " 01/02/2024 | \n",
- " 13.22 | \n",
- " 14.23 | \n",
- " 13.10 | \n",
- " 13.20 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8574 | \n",
- " 01/03/2024 | \n",
- " 13.35 | \n",
- " 14.22 | \n",
- " 13.33 | \n",
- " 14.04 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8575 | \n",
- " 01/04/2024 | \n",
- " 13.93 | \n",
- " 14.20 | \n",
- " 13.64 | \n",
- " 14.13 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8576 | \n",
- " 01/05/2024 | \n",
- " 14.24 | \n",
- " 14.58 | \n",
- " 13.29 | \n",
- " 13.35 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- " | 8577 | \n",
- " 01/06/2024 | \n",
- " 14.27 | \n",
- " 14.61 | \n",
- " 13.32 | \n",
- " 13.38 | \n",
- " Low | \n",
- " 2024 | \n",
- "
\n",
- " \n",
- "
\n",
- "
8578 rows × 7 columns
\n",
- "
"
- ],
- "text/plain": [
- " DATE OPEN HIGH LOW CLOSE Level Year\n",
- "0 01/02/1990 17.24 17.24 17.24 17.24 Low 1990\n",
- "1 01/03/1990 18.19 18.19 18.19 18.19 Low 1990\n",
- "2 01/04/1990 19.22 19.22 19.22 19.22 Low 1990\n",
- "3 01/05/1990 20.11 20.11 20.11 20.11 Medium 1990\n",
- "4 01/08/1990 20.26 20.26 20.26 20.26 Medium 1990\n",
- "... ... ... ... ... ... ... ...\n",
- "8573 01/02/2024 13.22 14.23 13.10 13.20 Low 2024\n",
- "8574 01/03/2024 13.35 14.22 13.33 14.04 Low 2024\n",
- "8575 01/04/2024 13.93 14.20 13.64 14.13 Low 2024\n",
- "8576 01/05/2024 14.24 14.58 13.29 13.35 Low 2024\n",
- "8577 01/06/2024 14.27 14.61 13.32 13.38 Low 2024\n",
- "\n",
- "[8578 rows x 7 columns]"
- ]
- },
- "execution_count": 60,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 4\n",
- "# Temporalmente eliminamos las columnas 'Level' y 'Year' para evitar NaN en el df\n",
- "df = df.drop(['Level', 'Year'], axis=1)\n",
- "\n",
- "# Agregamos la nueva fila solicitada\n",
- "new_row = pd.DataFrame({'DATE': ['01/06/2024'], \n",
- " 'OPEN': [14.27], \n",
- " 'HIGH': [14.61], \n",
- " 'LOW': [13.32], \n",
- " 'CLOSE': [13.38]}) \n",
- "\n",
- "# Agregamos la nueva fila al DataFrame existente\n",
- "df = pd.concat([df, new_row], ignore_index=True)\n",
- "\n",
- "# Recategorizamos 'Level' y 'Year' después de agregar la nueva fila\n",
- "df['Level'] = pd.cut(df['CLOSE'], bins=[-float('inf'), 20, 30, float('inf')],\n",
- " labels=['Low', 'Medium', 'High'])\n",
- "df['Year'] = pd.to_datetime(df['DATE']).dt.year\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 61,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Year | \n",
- " Average Close | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1990 | \n",
- " 23.06 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1991 | \n",
- " 18.37 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1992 | \n",
- " 15.45 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1993 | \n",
- " 12.69 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1994 | \n",
- " 13.93 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " 1995 | \n",
- " 12.39 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " 1996 | \n",
- " 16.44 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " 1997 | \n",
- " 22.36 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " 1998 | \n",
- " 25.60 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " 1999 | \n",
- " 24.37 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " 2000 | \n",
- " 23.32 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " 2001 | \n",
- " 25.75 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " 2002 | \n",
- " 27.29 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " 2003 | \n",
- " 21.98 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " 2004 | \n",
- " 15.48 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " 2005 | \n",
- " 12.81 | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " 2006 | \n",
- " 12.81 | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " 2007 | \n",
- " 17.54 | \n",
- "
\n",
- " \n",
- " | 18 | \n",
- " 2008 | \n",
- " 32.70 | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " 2009 | \n",
- " 31.48 | \n",
- "
\n",
- " \n",
- " | 20 | \n",
- " 2010 | \n",
- " 22.55 | \n",
- "
\n",
- " \n",
- " | 21 | \n",
- " 2011 | \n",
- " 24.20 | \n",
- "
\n",
- " \n",
- " | 22 | \n",
- " 2012 | \n",
- " 17.80 | \n",
- "
\n",
- " \n",
- " | 23 | \n",
- " 2013 | \n",
- " 14.23 | \n",
- "
\n",
- " \n",
- " | 24 | \n",
- " 2014 | \n",
- " 14.18 | \n",
- "
\n",
- " \n",
- " | 25 | \n",
- " 2015 | \n",
- " 16.67 | \n",
- "
\n",
- " \n",
- " | 26 | \n",
- " 2016 | \n",
- " 15.83 | \n",
- "
\n",
- " \n",
- " | 27 | \n",
- " 2017 | \n",
- " 11.09 | \n",
- "
\n",
- " \n",
- " | 28 | \n",
- " 2018 | \n",
- " 16.64 | \n",
- "
\n",
- " \n",
- " | 29 | \n",
- " 2019 | \n",
- " 15.39 | \n",
- "
\n",
- " \n",
- " | 30 | \n",
- " 2020 | \n",
- " 29.25 | \n",
- "
\n",
- " \n",
- " | 31 | \n",
- " 2021 | \n",
- " 19.66 | \n",
- "
\n",
- " \n",
- " | 32 | \n",
- " 2022 | \n",
- " 25.64 | \n",
- "
\n",
- " \n",
- " | 33 | \n",
- " 2023 | \n",
- " 16.85 | \n",
- "
\n",
- " \n",
- " | 34 | \n",
- " 2024 | \n",
- " 13.62 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Year Average Close\n",
- "0 1990 23.06\n",
- "1 1991 18.37\n",
- "2 1992 15.45\n",
- "3 1993 12.69\n",
- "4 1994 13.93\n",
- "5 1995 12.39\n",
- "6 1996 16.44\n",
- "7 1997 22.36\n",
- "8 1998 25.60\n",
- "9 1999 24.37\n",
- "10 2000 23.32\n",
- "11 2001 25.75\n",
- "12 2002 27.29\n",
- "13 2003 21.98\n",
- "14 2004 15.48\n",
- "15 2005 12.81\n",
- "16 2006 12.81\n",
- "17 2007 17.54\n",
- "18 2008 32.70\n",
- "19 2009 31.48\n",
- "20 2010 22.55\n",
- "21 2011 24.20\n",
- "22 2012 17.80\n",
- "23 2013 14.23\n",
- "24 2014 14.18\n",
- "25 2015 16.67\n",
- "26 2016 15.83\n",
- "27 2017 11.09\n",
- "28 2018 16.64\n",
- "29 2019 15.39\n",
- "30 2020 29.25\n",
- "31 2021 19.66\n",
- "32 2022 25.64\n",
- "33 2023 16.85\n",
- "34 2024 13.62"
- ]
- },
- "execution_count": 61,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 5\n",
- "# Agrupa por 'Year' y calcula el promedio de 'Close' para cada año\n",
- "average_close_by_year = df.groupby('Year')['CLOSE'].mean()\n",
- "\n",
- "# Redondea los valores a tres decimales\n",
- "average_close_by_year = average_close_by_year.round(2)\n",
- "\n",
- "# Crea un nuevo DataFrame con los resultados\n",
- "df_average_year = pd.DataFrame({'Year': average_close_by_year.index, 'Average Close': average_close_by_year.values})\n",
- "df_average_year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 62,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Low 5276\n",
- "Medium 2582\n",
- "High 720\n",
- "Name: Level, dtype: int64"
- ]
- },
- "execution_count": 62,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Count the number of 'High', 'Medium', and 'Low' Level days in the data\n",
- "df = df['Level'].value_counts()\n",
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "hide_input": false,
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.4"
- },
- "toc": {
- "base_numbering": 1,
- "nav_menu": {},
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "title_cell": "Table of Contents",
- "title_sidebar": "Contents",
- "toc_cell": false,
- "toc_position": {},
- "toc_section_display": true,
- "toc_window_display": false
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
From d7cf21f0b43c363d46fc4cbfc6a52536e504fc63 Mon Sep 17 00:00:00 2001
From: Rafael Vargas <90073975+rafa0303@users.noreply.github.com>
Date: Sun, 14 Jan 2024 21:41:05 -0500
Subject: [PATCH 14/14] Rename group_7_ass_2_2024_re.ipynb to
group_7_ass_2_2024.ipynb
This was named group_7_ass_2_2024_re before because of Anzony's request to redo the task.
---
.../{group_7_ass_2_2024_re.ipynb => group_7_ass_2_2024.ipynb} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename assignments/assignment_2/{group_7_ass_2_2024_re.ipynb => group_7_ass_2_2024.ipynb} (100%)
diff --git a/assignments/assignment_2/group_7_ass_2_2024_re.ipynb b/assignments/assignment_2/group_7_ass_2_2024.ipynb
similarity index 100%
rename from assignments/assignment_2/group_7_ass_2_2024_re.ipynb
rename to assignments/assignment_2/group_7_ass_2_2024.ipynb