Initial commit: IHK Ausbildung materials
This commit is contained in:
199
2-Ausbildungsjahr/LF8-Datenintegration/LF8-03-Datenformate.md
Normal file
199
2-Ausbildungsjahr/LF8-Datenintegration/LF8-03-Datenformate.md
Normal file
@@ -0,0 +1,199 @@
|
||||
# 8.3 Datenformate
|
||||
|
||||
## JSON (JavaScript Object Notation)
|
||||
|
||||
### Grundstruktur
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "Max Mustermann",
|
||||
"alter": 25,
|
||||
"adresse": {
|
||||
"stadt": "Berlin",
|
||||
"plz": "10115"
|
||||
},
|
||||
"hobbys": ["Lesen", "Programmieren"],
|
||||
"aktiv": true
|
||||
}
|
||||
```
|
||||
|
||||
### Datentypen
|
||||
|
||||
| Typ | Beispiel |
|
||||
|-----|----------|
|
||||
| String | "Hallo" |
|
||||
| Number | 42, 3.14 |
|
||||
| Boolean | true, false |
|
||||
| Array | [1, 2, 3] |
|
||||
| Object | {"key": "value"} |
|
||||
| Null | null |
|
||||
|
||||
### JSON in Python
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
# String zu Dictionary
|
||||
daten = json.loads('{"name": "Max"}')
|
||||
|
||||
# Dictionary zu String
|
||||
text = json.dumps(daten, indent=2)
|
||||
|
||||
# Mit Datei
|
||||
with open('daten.json', 'w') as f:
|
||||
json.dump(daten, f, indent=2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## XML (eXtensible Markup Language)
|
||||
|
||||
### Grundstruktur
|
||||
|
||||
```xml
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<benutzer>
|
||||
<name>Max Mustermann</name>
|
||||
<alter>25</alter>
|
||||
<adresse>
|
||||
<stadt>Berlin</stadt>
|
||||
</adresse>
|
||||
</benutzer>
|
||||
```
|
||||
|
||||
### XML-Attribute
|
||||
|
||||
```xml
|
||||
<benutzer id="123" typ="admin">
|
||||
<name>Max</name>
|
||||
</benutzer>
|
||||
```
|
||||
|
||||
### XML in Python
|
||||
|
||||
```python
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
# Parsen
|
||||
baum = ET.parse('daten.xml')
|
||||
wurzel = baum.getroot()
|
||||
|
||||
# Element finden
|
||||
for kind in wurzel.findall('.//kind'):
|
||||
print(kind.text)
|
||||
|
||||
# Erstellen
|
||||
root = ET.Element('daten')
|
||||
ET.SubElement(root, 'wert').text = 'test'
|
||||
baum = ET.ElementTree(root)
|
||||
baum.write('ausgabe.xml')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CSV (Comma Separated Values)
|
||||
|
||||
### Grundstruktur
|
||||
|
||||
```csv
|
||||
Name,Alter,Stadt
|
||||
Max,25,Berlin
|
||||
Anna,30,Hamburg
|
||||
Peter,28,München
|
||||
```
|
||||
|
||||
### Mit Python
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
# Lesen
|
||||
df = pd.read_csv('daten.csv', sep=',')
|
||||
|
||||
# Schreiben
|
||||
df.to_csv('ausgabe.csv', index=False)
|
||||
|
||||
# Mit Header überspringen
|
||||
df = pd.read_csv('daten.csv', skiprows=1)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Vergleich
|
||||
|
||||
| Kriterium | JSON | XML | CSV |
|
||||
|-----------|------|-----|-----|
|
||||
| Lesbarkeit | Gut | Gut | Gut |
|
||||
| Datentypen | Ja | Ja | Nein |
|
||||
| Komplexität | Niedrig | Mittel | Niedrig |
|
||||
| Dateigröße | Klein | Groß | Kleinest |
|
||||
| Einsatz | APIs | Konfiguration | Tabellarisch |
|
||||
|
||||
---
|
||||
|
||||
## Datenkonvertierung
|
||||
|
||||
### CSV zu JSON
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
# CSV lesen
|
||||
df = pd.read_csv('daten.csv')
|
||||
|
||||
# Zu JSON
|
||||
json_string = df.to_json(orient='records', indent=2)
|
||||
|
||||
# In Datei schreiben
|
||||
with open('daten.json', 'w') as f:
|
||||
f.write(json_string)
|
||||
```
|
||||
|
||||
### XML zu JSON
|
||||
|
||||
```python
|
||||
import xmltodict
|
||||
import json
|
||||
|
||||
# XML zu Dictionary
|
||||
with open('daten.xml') as f:
|
||||
daten = xmltodict.parse(f)
|
||||
|
||||
# Zu JSON
|
||||
json_string = json.dumps(daten, indent=2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Datenvalidierung
|
||||
|
||||
### JSON Schema
|
||||
|
||||
```json
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"alter": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
}
|
||||
},
|
||||
"required": ["name"]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Querverweise
|
||||
|
||||
- [[LF8-02-Schnittstellen|Zurück: Schnittstellen]]
|
||||
- [[LF8-04-ETL-Prozesse|Nächstes Thema: ETL-Prozesse]]
|
||||
|
||||
---
|
||||
|
||||
*Stand: 2024*
|
||||
Reference in New Issue
Block a user