From ab68b87a8f7ed9aef0d61c53adeac4d1a8097bde Mon Sep 17 00:00:00 2001 From: rajath Date: Tue, 25 Apr 2023 08:51:11 +0530 Subject: [PATCH] homework1 --- Homework1/Dockerfile | 9 ++++++++ Homework1/docker-compose.yml | 9 ++++++++ Homework1/web_scraping.py | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 Homework1/Dockerfile create mode 100644 Homework1/docker-compose.yml create mode 100644 Homework1/web_scraping.py diff --git a/Homework1/Dockerfile b/Homework1/Dockerfile new file mode 100644 index 0000000..04bc5f2 --- /dev/null +++ b/Homework1/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.10.2-alpine3.15 +# Create directories +RUN mkdir -p /root/workspace/src +COPY ./web_scraping.py /root/workspace/src +# Switch to project directory +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib psycopg2-binary diff --git a/Homework1/docker-compose.yml b/Homework1/docker-compose.yml new file mode 100644 index 0000000..d976dd1 --- /dev/null +++ b/Homework1/docker-compose.yml @@ -0,0 +1,9 @@ +psql-db: + image: 'postgres:14' + container_name: psql-db2 + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5434:5432' diff --git a/Homework1/web_scraping.py b/Homework1/web_scraping.py new file mode 100644 index 0000000..b528cf7 --- /dev/null +++ b/Homework1/web_scraping.py @@ -0,0 +1,40 @@ +import requests +from bs4 import BeautifulSoup +import re +import psycopg2 + +conn = psycopg2.connect( + host="172.17.0.2", + port="5432", + database="hw1", + user="postgres", + password="123456" +) + +print("Connection Successful") + +cur = conn.cursor() + +res = requests.get('https://blog.python.org/') +soup = BeautifulSoup(res.content, 'html5lib') + +titles=[] +authors=[] + +for i in soup.find_all('h3', class_='entry-title'): + string = i.find('a').getText() + titles.append(string.strip()) + +for i in soup.find_all('span', class_='fn'): + string = i.getText() + authors.append(string.strip()) + +for i in range(4): + cur.execute( + "INSERT INTO blog_data(no,title,author) VALUES(%s,%s,%s)", (i+1, titles[i], authors[i]) + ) + +conn.commit() + +cur.close() +conn.close()