Add practical examples to multiple files
- LF9-03 Virtualisierung: Docker Compose + Volume examples - LF6-02 Frontend: To-Do list practical example - LF8-04 ETL: Complete ETL pipeline example - LF6-04 Sicherheit: Express.js security headers - LF2-04 Nutzwertanalyse: Cloud provider selection example - LF9-04 Monitoring: Prometheus alerts + Python logging
This commit is contained in:
@@ -207,6 +207,81 @@ ETL - Fehlerstrategien
|
||||
|
||||
---
|
||||
|
||||
## Praktisches Beispiel: Vollständiger ETL-Pipeline
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import requests
|
||||
from sqlalchemy import create_engine
|
||||
import logging
|
||||
|
||||
# Logging konfigurieren
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def etl_pipeline():
|
||||
"""
|
||||
Vollständiger ETL-Pipeline für Verkaufsdaten
|
||||
"""
|
||||
|
||||
# === EXTRACT ===
|
||||
logger.info("Starte Extraktion...")
|
||||
|
||||
# Aus CSV
|
||||
kunden_df = pd.read_csv('daten/kunden.csv')
|
||||
bestellungen_df = pd.read_csv('daten/bestellungen.csv')
|
||||
|
||||
# Aus API
|
||||
try:
|
||||
response = requests.get('https://api.shop.de/produkte', timeout=30)
|
||||
produkte_df = pd.DataFrame(response.json())
|
||||
except Exception as e:
|
||||
logger.error(f"API Fehler: {e}")
|
||||
produkte_df = pd.DataFrame()
|
||||
|
||||
logger.info(f"Extrahiert: {len(kunden_df)} Kunden, {len(bestellungen_df)} Bestellungen")
|
||||
|
||||
# === TRANSFORM ===
|
||||
logger.info("Starte Transformation...")
|
||||
|
||||
# Daten bereinigen
|
||||
kunden_df = kunden_df.drop_duplicates()
|
||||
kunden_df['email'] = kunden_df['email'].str.lower().str.strip()
|
||||
kunden_df['erstellt_am'] = pd.to_datetime(kunden_df['erstellt_am'])
|
||||
|
||||
# Berechnungen
|
||||
bestellungen_df['umsatz_mit_mwst'] = bestellungen_df['umsatz_netto'] * 1.19
|
||||
|
||||
# JOIN: Bestellungen mit Kunden verbinden
|
||||
merged_df = bestellungen_df.merge(
|
||||
kunden_df[['kunden_id', 'name', 'stadt']],
|
||||
on='kunden_id',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Aggregation: Umsatz pro Stadt
|
||||
umsatz_pro_stadt = merged_df.groupby('stadt')['umsatz_netto'].sum().reset_index()
|
||||
|
||||
logger.info(f"Transformation abgeschlossen: {len(merged_df)} Datensätze")
|
||||
|
||||
# === LOAD ===
|
||||
logger.info("Starte Laden...")
|
||||
|
||||
# Datenbank-Verbindung
|
||||
engine = create_engine('postgresql://user:pass@localhost:5432/warehouse')
|
||||
|
||||
# In Datenbank laden
|
||||
merged_df.to_sql('fact_bestellungen', engine, if_exists='replace', index=False)
|
||||
umsatz_pro_stadt.to_sql('dim_umsatz_stadt', engine, if_exists='replace', index=False)
|
||||
|
||||
logger.info("ETL Pipeline erfolgreich abgeschlossen!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
etl_pipeline()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Querverweise
|
||||
|
||||
- [[LF8-03-Datenformate|Zurück: Datenformate]]
|
||||
|
||||
Reference in New Issue
Block a user