diff --git a/homework/Dockerfile b/homework/Dockerfile new file mode 100644 index 0000000..84f0e65 --- /dev/null +++ b/homework/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.10.2-alpine3.15 +# Create directories +RUN mkdir -p /root/workspace/src +COPY ./webScraping.py /root/workspace/src +# Switch to project directory +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib psycopg2-binary diff --git a/homework/docker-compose.yml b/homework/docker-compose.yml new file mode 100644 index 0000000..e3bff76 --- /dev/null +++ b/homework/docker-compose.yml @@ -0,0 +1,10 @@ + +psql-db: + image: 'postgres:14' + container_name: psql-db + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5434:5432' diff --git a/homework/webScraping.py b/homework/webScraping.py new file mode 100644 index 0000000..670e58a --- /dev/null +++ b/homework/webScraping.py @@ -0,0 +1,33 @@ +import requests +from bs4 import BeautifulSoup +import re +import psycopg2 +conn = psycopg2.connect( +host="172.17.0.2", +port="5432", +database="pybd", +user="postgres", +password="123456" +) +print("Connection Successful") +cur = conn.cursor() +res = requests.get('https://blog.python.org/') +soup = BeautifulSoup(res.content, 'html5lib') +titles=[] +authors=[] +for i in soup.find_all('h3', class_='entry-title'): + + string = i.find('a').getText() + titles.append(string.strip()) +for i in soup.find_all('span', class_='fn'): + string = i.getText() + authors.append(string.strip()) +for i in range(4): + cur.execute( + "CREATE TABLE py1(no INT, title VARCHAR(100),author VARCHAR(100));" +"INSERT INTO py1(no,title,author) VALUES(%s,%s,%s)", (i+1, titles[i], authors[i]) +) +conn.commit() +cur.close() +conn.close() +