diff --git a/Dockerfile b/Dockerfile index f07ae2b..b751463 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,10 @@ FROM python:3.10.2-alpine3.15 -COPY . . -# Install Postgres -RUN apk update -RUN apk add postgresql -RUN chown postgres:postgres /run/postgresql/ -# Install requirements -COPY ./requirements.txt /tmp -RUN pip install -r /tmp/requirements.txt -# For psycopg2 -RUN apk add --virtual postgresql-deps libpq-dev -# Create directories +# Create directories RUN mkdir -p /root/workspace/src -# Mount your local file -COPY ./web_scraping_sample.py /root/workspace/src +COPY ./web_scraping_sample.py /root/workspace/src # Switch to project directory -WORKDIR /root/workspace/src \ No newline at end of file +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib +RUN pip3 install psycopg2-binary --user diff --git a/docker-compose.yaml b/docker-compose.yaml index cad1491..b2e9445 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,26 +1,27 @@ version: "3" services: - pyhton_service: - build: - context: ./ - dockerfile: Dockerfile - image: workshop1 - container_name: workshop_python_container - stdin_open: true # docker attach container_id - tty: true - ports: - - "8000:8000" - volumes: - - .:/app - depends_on: - - postgres_service + + psql-db: + image: 'postgres:14' + container_name: psql-db + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5434:5432' + app: + - postgres_service: - image: postgres - container_name: workshop_postgres_container - ports: - - "5432:5432" - environment: - POSTGRES_PASSWORD: admin - volumes: - - .:/var/lib/postgres + build: + context: ./ + dockerfile: Dockerfile + image: workshop1 + container_name: workshop_python_container + stdin_open: true # docker attach container_id + tty: true + ports: + - "8000:8000" + volumes: + - .:/app + diff --git a/web_scraping_sample.py b/web_scraping_sample.py index c9ca142..311099e 100644 --- a/web_scraping_sample.py +++ b/web_scraping_sample.py @@ -1,31 +1,30 @@ import requests from bs4 import BeautifulSoup -import re import psycopg2 -# Create connection to database -conn = psycopg2.connect( - host="postgres_service", - database="LipsumGenerator", - user="postgres", - password="admin") -cursor = conn.cursor() - -res = requests.get('https://www.lipsum.com/') -soup = BeautifulSoup(res.content, 'html5lib') # If this line causes an error, run 'pip install html5lib' or install html5lib -data = soup.find(re.compile(r'div'), attrs={'id': "Panes"}) -print(data.find("lorem")) - -question_list = [] -answer_list = [] -for row in data.findAll("div"): - question_list.append(row.h2.text) - temp_string = "" - counter=0 - for i in row.findAll("p"): - temp_string = temp_string + "\n" + i.text - answer_list.append(temp_string) -file = open("qn_ans_ans", "w") - -for i in range(len(question_list)): - cursor.execute("insert into qn_ans values(%s,%s)", (question_list[i], answer_list[i])) +url = 'https://blog.python.org/' +response = requests.get(url) + +soup = BeautifulSoup(response.content, 'html.parser') + +titles = soup.find_all('h3', class_='post-title') +dates = soup.find_all('h2', class_='date-header') + +conn = psycopg2.connect(database="mydatabase", user="postgres", password="123456", host="lostgres_service")#, #port="5432") + + +cur = conn.cursor() + + +cur.execute('CREATE TABLE IF NOT EXISTS blog (id SERIAL PRIMARY KEY, title TEXT, date DATE)') + +for i in range(len(titles)): + title = titles[i].get_text() + date = dates[i].get_text() + cur.execute('INSERT INTO blog (title, date) VALUES (%s, %s)', (title, date)) + + +conn.commit() +cur.close() +conn.close() +