From 42917af43c50263f83c5c7fb42dad09b53454f48 Mon Sep 17 00:00:00 2001 From: "anvithgopabellare@gmail.com" Date: Tue, 25 Apr 2023 01:18:55 +0530 Subject: [PATCH 1/2] changes made --- Dockerfile | 21 +++++---------- docker-compose.yaml | 47 +++++++++++++++++----------------- web_scraping_sample.py | 58 ++++++++++++++++++++++-------------------- 3 files changed, 62 insertions(+), 64 deletions(-) diff --git a/Dockerfile b/Dockerfile index f07ae2b..b751463 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,10 @@ FROM python:3.10.2-alpine3.15 -COPY . . -# Install Postgres -RUN apk update -RUN apk add postgresql -RUN chown postgres:postgres /run/postgresql/ -# Install requirements -COPY ./requirements.txt /tmp -RUN pip install -r /tmp/requirements.txt -# For psycopg2 -RUN apk add --virtual postgresql-deps libpq-dev -# Create directories +# Create directories RUN mkdir -p /root/workspace/src -# Mount your local file -COPY ./web_scraping_sample.py /root/workspace/src +COPY ./web_scraping_sample.py /root/workspace/src # Switch to project directory -WORKDIR /root/workspace/src \ No newline at end of file +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib +RUN pip3 install psycopg2-binary --user diff --git a/docker-compose.yaml b/docker-compose.yaml index cad1491..b2e9445 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,26 +1,27 @@ version: "3" services: - pyhton_service: - build: - context: ./ - dockerfile: Dockerfile - image: workshop1 - container_name: workshop_python_container - stdin_open: true # docker attach container_id - tty: true - ports: - - "8000:8000" - volumes: - - .:/app - depends_on: - - postgres_service + + psql-db: + image: 'postgres:14' + container_name: psql-db + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5434:5432' + app: + - postgres_service: - image: postgres - container_name: workshop_postgres_container - ports: - - "5432:5432" - environment: - POSTGRES_PASSWORD: admin - volumes: - - .:/var/lib/postgres + build: + context: ./ + dockerfile: Dockerfile + image: workshop1 + container_name: workshop_python_container + stdin_open: true # docker attach container_id + tty: true + ports: + - "8000:8000" + volumes: + - .:/app + diff --git a/web_scraping_sample.py b/web_scraping_sample.py index c9ca142..cefeaca 100644 --- a/web_scraping_sample.py +++ b/web_scraping_sample.py @@ -1,31 +1,35 @@ import requests from bs4 import BeautifulSoup -import re import psycopg2 -# Create connection to database -conn = psycopg2.connect( - host="postgres_service", - database="LipsumGenerator", - user="postgres", - password="admin") -cursor = conn.cursor() - -res = requests.get('https://www.lipsum.com/') -soup = BeautifulSoup(res.content, 'html5lib') # If this line causes an error, run 'pip install html5lib' or install html5lib -data = soup.find(re.compile(r'div'), attrs={'id': "Panes"}) -print(data.find("lorem")) - -question_list = [] -answer_list = [] -for row in data.findAll("div"): - question_list.append(row.h2.text) - temp_string = "" - counter=0 - for i in row.findAll("p"): - temp_string = temp_string + "\n" + i.text - answer_list.append(temp_string) -file = open("qn_ans_ans", "w") - -for i in range(len(question_list)): - cursor.execute("insert into qn_ans values(%s,%s)", (question_list[i], answer_list[i])) +# Send a GET request to the website +url = 'https://blog.python.org/' +response = requests.get(url) + +# Parse the HTML content with BeautifulSoup +soup = BeautifulSoup(response.content, 'html.parser') + +# Extract the blog post titles and dates +titles = soup.find_all('h3', class_='post-title') +dates = soup.find_all('h2', class_='date-header') + +# Create a connection to the PostgreSQL database +conn = psycopg2.connect(database="mydatabase", user="postgres", password="123456", host="lostgres_service")#, #port="5432") + +# Create a cursor object to execute SQL statements +cur = conn.cursor() + +# Create a table to store the blog data +cur.execute('CREATE TABLE IF NOT EXISTS blog (id SERIAL PRIMARY KEY, title TEXT, date DATE)') + +# Insert the blog post titles and dates into the database +for i in range(len(titles)): + title = titles[i].get_text() + date = dates[i].get_text() + cur.execute('INSERT INTO blog (title, date) VALUES (%s, %s)', (title, date)) + +# Commit the changes and close the connection +conn.commit() +cur.close() +conn.close() + From fd2e114f66873aa56dee6db8b43da687b83e6a45 Mon Sep 17 00:00:00 2001 From: Anvithgopa <57863118+Anvithgopa@users.noreply.github.com> Date: Tue, 25 Apr 2023 07:31:14 +0530 Subject: [PATCH 2/2] Update web_scraping_sample.py --- web_scraping_sample.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/web_scraping_sample.py b/web_scraping_sample.py index cefeaca..311099e 100644 --- a/web_scraping_sample.py +++ b/web_scraping_sample.py @@ -2,33 +2,28 @@ from bs4 import BeautifulSoup import psycopg2 -# Send a GET request to the website url = 'https://blog.python.org/' response = requests.get(url) -# Parse the HTML content with BeautifulSoup soup = BeautifulSoup(response.content, 'html.parser') -# Extract the blog post titles and dates titles = soup.find_all('h3', class_='post-title') dates = soup.find_all('h2', class_='date-header') -# Create a connection to the PostgreSQL database conn = psycopg2.connect(database="mydatabase", user="postgres", password="123456", host="lostgres_service")#, #port="5432") -# Create a cursor object to execute SQL statements + cur = conn.cursor() -# Create a table to store the blog data + cur.execute('CREATE TABLE IF NOT EXISTS blog (id SERIAL PRIMARY KEY, title TEXT, date DATE)') -# Insert the blog post titles and dates into the database for i in range(len(titles)): title = titles[i].get_text() date = dates[i].get_text() cur.execute('INSERT INTO blog (title, date) VALUES (%s, %s)', (title, date)) -# Commit the changes and close the connection + conn.commit() cur.close() conn.close()