Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions base_images/flask_monitor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,3 @@
flask==3.0.0
blinker==1.9.0
certifi==2025.8.3
charset-normalizer==3.4.3
click==8.2.1
idna==3.10
itsdangerous==2.2.0
Jinja2==3.1.6
MarkupSafe==3.0.2
pillow==11.3.0
requests==2.32.5
urllib3==2.5.0
Werkzeug==3.1.3
Jinja2==3.1.2
requests==2.32.4
7 changes: 7 additions & 0 deletions compose.dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ services:
container_name: gs_compiler_035_talsperren
restart: unless-stopped

gs_compiler_062_abzucht_oker:
build: ./docker_instances/gs_compiler_062_abzucht_oker
volumes:
- ./httpdocs/crawler:/app/output
container_name: gs_compiler_062_abzucht_oker
restart: unless-stopped

# === EDUCATION CRAWLER ===
gs_compiler_050_tschuessschule_studium:
build: ./docker_instances/050_tschuessschule_studium
Expand Down
4 changes: 0 additions & 4 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,6 @@ services:
- ./httpdocs/crawler:/app/output
container_name: gs_compiler_056_serviceportal
restart: unless-stopped
gs_compiler_068_serviceportal:
image: ghcr.io/machmitgoslar/gs_crawler_068_altstadtfest:latest
container_name: gs_compiler_068_altstadtfest
restart: unless-stopped

# Gemeinsames Volume für alle Output-Dateien
volumes:
Expand Down
7 changes: 0 additions & 7 deletions docker_instances/000_health_monitor/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,6 @@
'source': 'https://service.goslar.de/home',
'schedule': 'Täglich 09:00',
'type': 'Service Portal'
},
'gs_compiler_068_altstadtfest': {
'name': 'Altstadtfest Goslar',
'expected_files': [],
'source': 'https://meingoslar.de/',
'schedule': 'API Endpoint',
'type': 'API Endpoint'
}
}

Expand Down
42 changes: 28 additions & 14 deletions docker_instances/044_wiedelah/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,54 @@
import os

# URL der Quelle
url = "https://dg-wiedelah.de/category/berichte-von-veranstaltungen/"
url = "https://dg-wiedelah.de/category/arbeitseinsaetze/"

# Anfrage und HTML parsen
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Alle Einträge finden
container = soup.find("div", id="main-content")
widget = container.find("div", class_="post-listing") if container else None
li_tags = widget.find_all("article") if widget else []
container = soup.find("div", id="categort-posts-widget-5")
widget = container.find("div", class_="widget-container") if container else None
ul = widget.find("ul") if widget else None
li_tags = ul.find_all("li") if ul else []

entries = []
for index, li in enumerate(li_tags):
div = li.find("h2")
div = li.find("div")
a_tag = div.find("a", href=True) if div else None
call_to_action_url = a_tag["href"] if a_tag else "https://dg-wiedelah.de/"
call_to_action_url = a_tag["href"] if a_tag else ""

img_tag = a_tag.find("img") if a_tag else None
image_url = img_tag["src"] if img_tag else ""

div_beschreibung = li.find("div", class_="entry") if div else None
h3 = li.find("h3") if div else None

p_tag = div_beschreibung.find("p") if div else None
description = p_tag.get_text(strip=True) if p_tag else ""
a_tag = h3.find("a") if div else None
description = a_tag.get_text(strip=True) if a_tag else ""

# call_to_action_url = a_tag["href"] if a_tag else ""

span_tag = li.find("span", class_="tie-date")
date_text = span_tag.get_text(strip=True) if span_tag else ""

# Datum leer
published_at = ""
# Datum umwandeln in datetime-Objekt (mit deutschem Format)
try:
date_obj = datetime.strptime(date_text, "%d. %B %Y")
except ValueError:
# Fallback für Monatsnamen (z. B. Juni) ins Englische konvertieren
month_map = {
"Januar": "January", "Februar": "February", "März": "March",
"April": "April", "Mai": "May", "Juni": "June",
"Juli": "July", "August": "August", "September": "September",
"Oktober": "October", "November": "November", "Dezember": "December"
}
for de, en in month_map.items():
date_text = date_text.replace(de, en)
date_obj = datetime.strptime(date_text, "%d. %B %Y")

# Datum ins Format yyyy-mm-ddThh:00 umwandeln
published_at = date_obj.strftime("%Y-%m-%dT%H:00")

entry = {
"id": index,
Expand All @@ -54,8 +70,7 @@

# Zufälligen Eintrag speichern
if entries:
# zufall = random.choice(entries)
zufall = entries[0]
zufall = random.choice(entries)
with open("output/044-wiedelah.json", "w", encoding="utf-8") as f:
json.dump(zufall, f, ensure_ascii=False, indent=2)
print(zufall)
Expand All @@ -64,4 +79,3 @@
json.dump(entries, f, ensure_ascii=False, indent=2)
else:
print("❌ Keine Arbeitseinsätze gefunden.")

12 changes: 12 additions & 0 deletions docker_instances/062_abzucht_oker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Migriert zu Base-Image für bessere Wartbarkeit
FROM ghcr.io/machmitgoslar/gs_crawler_python_basic_crawler:latest

# Kopiere container-spezifische Dateien
COPY script.py .
COPY crontab /etc/cron.d/mycron



# Konfiguriere Cron
RUN chmod 0600 /etc/cron.d/mycron && \
crontab /etc/cron.d/mycron
8 changes: 8 additions & 0 deletions docker_instances/062_abzucht_oker/crontab
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# ┌───────────── Minute (0 - 59)
# │ ┌───────────── Hour (0 - 23)
# │ │ ┌───────────── Day of month (1 - 31)
# │ │ │ ┌───────────── Month (1 - 12)
# │ │ │ │ ┌───────────── Day of week (0 - 6) (Sunday to Saturday)
# │ │ │ │ │
# │ │ │ │ │
0 */6 * * * cd /app ; .venv/bin/python3 /app/script.py >> /proc/1/fd/1
Loading
Loading