diff --git a/Homework/Dockerfile b/Homework/Dockerfile
new file mode 100644
index 0000000..602fb81
--- /dev/null
+++ b/Homework/Dockerfile
@@ -0,0 +1,13 @@
+FROM python:3.10.2-alpine3.15
+# Create directories
+RUN mkdir -p /root/workspace/src
+COPY ./python_web_scrape.py /root/workspace/src
+# Switch to project directory
+WORKDIR /root/workspace/src
+# Install required packages
+RUN pip install --upgrade pip
+RUN pip install requests bs4 html5lib psycopg2-binary
+CMD ["python_web_scrape.py"]
+ENTRYPOINT ["python"]
+
+
diff --git a/Homework/docker-compose.yml b/Homework/docker-compose.yml
new file mode 100644
index 0000000..698411c
--- /dev/null
+++ b/Homework/docker-compose.yml
@@ -0,0 +1,9 @@
+psql-db:
+ image: 'postgres:14'
+ container_name: psql-db
+ environment:
+ - PGPASSWORD=123456
+ - POSTGRES_USER=postgres
+ - POSTGRES_PASSWORD=123456
+ ports:
+ - '5434:5432'
diff --git a/Homework/python_web_scrape.py b/Homework/python_web_scrape.py
new file mode 100644
index 0000000..04c89ec
--- /dev/null
+++ b/Homework/python_web_scrape.py
@@ -0,0 +1,142 @@
+import requests
+from bs4 import BeautifulSoup
+import re
+import psycopg2
+from psycopg2 import Error
+
+url = 'https://blog.python.org/'
+
+# If this line causes an error, run 'pip install html5lib' or install html5lib
+
+def create_connection(db_name, db_user, db_password, db_host, db_port):
+ connection = None
+ try:
+ connection = psycopg2.connect(
+ database=db_name,
+ user=db_user,
+ password=db_password,
+ host=db_host,
+ port=db_port
+ )
+ print("Connection to PostgreSQL DB successful")
+ except Error as e:
+ print(f"The error '{e}' occurred")
+
+ return connection
+
+# Function to execute insert queries
+def execute_query(connection,data):
+ cursor = connection.cursor()
+ try:
+ query = """
+ INSERT INTO python_blog_articles (date, title, body, author)
+ VALUES (%s, %s, %s, %s)
+ """
+ cursor.execute(query, data)
+ connection.commit()
+ print("Query executed successfully")
+ except Error as e:
+ print(f"The error '{e}' occurred")
+
+def create_table(connection):
+ try:
+ cursor = connection.cursor()
+ # SQL statement to create table if not exists
+ create_table_query = """
+ CREATE TABLE IF NOT EXISTS python_blog_articles (
+ id SERIAL PRIMARY KEY,
+ date VARCHAR(100),
+ title TEXT,
+ body TEXT,
+ author VARCHAR(100)
+ );
+ """
+ # Execute the SQL query
+ cursor.execute(create_table_query)
+ connection.commit()
+ print("Table created successfully or already exists")
+ except Error as e:
+ print(f"The error '{e}' occurred")
+
+date=[]
+titletext=[]
+bodytext=[]
+author=[]
+
+# Find all
elements with class="date-outer"
+def process_page(soup):
+ for div in soup.find_all('div', class_='date-outer'):
+ hd = div.find_all('div', 'post-outer')
+ for i in hd:
+ date_header = div.find('h2', class_='date-header')
+ if date_header:
+ date_text = date_header.find('span')
+ dt = date_text.get_text(strip=True)
+ date.append(dt)
+ tdiv = div.find('div', class_='date-posts')
+ for div1 in tdiv.find_all('div', class_='post-outer'):
+ title_head = div1.find('h3', class_='post-title entry-title')
+ if title_head:
+ title_text = title_head.text.strip()
+ titletext.append(title_text)
+ content_div = div1.find('div', class_='post-body entry-content')
+ if content_div:
+ for p_tag in content_div.find_all('p'):
+ paragraph_text = content_div.text.strip()
+ cleaned_content = re.sub(r'\n+', ' ', paragraph_text)
+ bodytext.append(cleaned_content)
+ foot = div.find_all('div', class_='post-outer')
+ for i in foot:
+ footer_head = div.find('div', class_='post-footer')
+ footer_text = footer_head.find('span', class_='post-author vcard').text.strip()
+ cleanedf_content = re.sub(r'\n+', ' ', footer_text)
+ author.append(cleanedf_content)
+
+
+
+
+def main():
+ # PostgreSQL database connection settings
+ db_name = 'webdemo'
+ db_user = 'postgres'
+ db_password = '123456'
+ db_host = 'localhost' # or your host
+ db_port = '5434' # or your port
+
+ # Establish connection to PostgreSQL
+ connection = create_connection(db_name, db_user, db_password, db_host, db_port)
+
+ if connection:
+ try:
+ res = requests.get(url)
+ soup = BeautifulSoup(res.content, 'html5lib')
+ process_page(soup
+ )
+
+ # Scraping subsequent pages until we have 50 articles
+ while len(titletext) < 50:
+ older_posts_link = soup.find('a', string=re.compile(r'Older Posts', re.IGNORECASE))
+ if older_posts_link:
+ next_page_url = older_posts_link['href']
+ res = requests.get(next_page_url)
+ soup = BeautifulSoup(res.content, 'html5lib')
+ process_page(soup)
+ else:
+ break
+
+ create_table(connection)
+ for i in range(len(titletext)):
+ data = (date[i], titletext[i], bodytext[i], author[i])
+ execute_query(connection,data)
+
+
+ except Error as e:
+ print(f"Error: {e}")
+
+ finally:
+ if connection:
+ connection.close()
+ print("PostgreSQL connection is closed")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/README.md b/README.md
index 9dd0a0e..bd0b751 100644
--- a/README.md
+++ b/README.md
@@ -46,22 +46,29 @@ One Day workshop on understanding Docker, Web Scrapping, Regular Expressions, Po
docker-compose --version
```
##### **_docker-compose version 1.25.0, build 0a186604_**
-
-## What will you learn by the end of this workshop?
-- By the end of this workshop you will learn how to build docker image and it's usage.
-- You will learn how to scrape a website using urllib/requests and Beautifulsoup.
-- You will learn Regular Expressions and how to work with it.
-- You will learn key features of PostgreSQL.
-- You will learn how to dockerize your project.
-
-## Schedule
-| Time | Topics
-|---------------|-------
-| 09:00 - 11:00 | [`Introduction to Docker`](/docs/introduction_to_docker.md)
-| 11:00 - 01:00 | [`Introduction to Webscrapping.`](/docs/introduction_to_webscraping.md)
-| 01:00 - 02:00 | `Break`
-| 02:00 - 03:00 | [`Dockerizing a project`](/docs/working_with_docker_container.md)
-| 03:00 - 04:00 | [`Introduction to PostgreSQL`](/docs/introduction_to_postgresql.md)
-| 04:00 - 04:30 | [`Introduction to Github`](/docs/introduction_to_git_commands.md)
-| 04:30 - 04:45 | `Q & A`
-| 04:45 - 05:00 | [`Wrapping Up`](/docs/workshop1_home_work.md)
+
+ ### Homework
+ - Run docker-compose.yml with posgresql commands to start server
+ ```
+ docker-compose up -d
+ ```
+ ```
+ docker exec -it psql-db bash
+ ```
+ ```
+ psql -U postgres
+ ```
+ - create a database to store the scraped content
+
+ - Run Dockerfile using commands
+ ```
+ docker build --no-cache --network=host ./ -t simple_python
+ ```
+ ```
+ docker run --network=host simple_python
+ ```
+
+ - The scraped content will be stored in a table format
+ - Date | Title | Content/BodyText | Author
+
+
diff --git a/requirements.txt b/requirements.txt
index 34c449f..6e1cbed 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-psycopg2==2.9.3
+psycopg2-binary==2.9.3
bs4
urllib2
requests
-html5lib==1.1
\ No newline at end of file
+html5lib==1.1