diff --git a/Homework1/Dockerfile b/Homework1/Dockerfile new file mode 100644 index 0000000..04bc5f2 --- /dev/null +++ b/Homework1/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.10.2-alpine3.15 +# Create directories +RUN mkdir -p /root/workspace/src +COPY ./web_scraping.py /root/workspace/src +# Switch to project directory +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib psycopg2-binary diff --git a/Homework1/docker-compose.yml b/Homework1/docker-compose.yml new file mode 100644 index 0000000..d976dd1 --- /dev/null +++ b/Homework1/docker-compose.yml @@ -0,0 +1,9 @@ +psql-db: + image: 'postgres:14' + container_name: psql-db2 + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5434:5432' diff --git a/Homework1/web_scraping.py b/Homework1/web_scraping.py new file mode 100644 index 0000000..b528cf7 --- /dev/null +++ b/Homework1/web_scraping.py @@ -0,0 +1,40 @@ +import requests +from bs4 import BeautifulSoup +import re +import psycopg2 + +conn = psycopg2.connect( + host="172.17.0.2", + port="5432", + database="hw1", + user="postgres", + password="123456" +) + +print("Connection Successful") + +cur = conn.cursor() + +res = requests.get('https://blog.python.org/') +soup = BeautifulSoup(res.content, 'html5lib') + +titles=[] +authors=[] + +for i in soup.find_all('h3', class_='entry-title'): + string = i.find('a').getText() + titles.append(string.strip()) + +for i in soup.find_all('span', class_='fn'): + string = i.getText() + authors.append(string.strip()) + +for i in range(4): + cur.execute( + "INSERT INTO blog_data(no,title,author) VALUES(%s,%s,%s)", (i+1, titles[i], authors[i]) + ) + +conn.commit() + +cur.close() +conn.close()