diff --git a/data-preparation.ipynb b/data-preparation.ipynb index 70c6ced..c8c09e4 100644 --- a/data-preparation.ipynb +++ b/data-preparation.ipynb @@ -38,33 +38,22 @@ " x = x[x[\"systolic\"] > x[\"relaxation\"]]\n", " \n", " # Cholesterin\n", - " x = x[abs(x[\"Cholesterol\"] - x[\"HDL\"] - x[\"LDL\"] - x[\"triglyceride\"] / 5) < 30]\n", + " x = x[abs(x[\"Cholesterol\"] - x[\"HDL\"] - x[\"LDL\"] - x[\"triglyceride\"] / 5) < 1]\n", " \n", + " # Entfernt nach Fachgespräch mit Ärztin\n", " # BMI > 16\n", - " x = x[(x[\"weight(kg)\"] / ((x[\"height(cm)\"] / 100) ** 2)) >= 15]\n", - " \n", - " # Anderes -> 10-facher Normbereich\n", - " x = x[x[\"AST\"] < 500] # 8 Werte\n", - " x = x[x[\"ALT\"] < 500] # 4 Werte\n", - " x = x[x[\"Gtp\"] < 660] # 26 Werte\n", - " x = x[x[\"hemoglobin\"] < 180] # 0 Werte\n", - " x = x[x[\"serum creatinine\"] < 12] # 0 Werte\n", - " x = x[x[\"fasting blood sugar\"] < 1000] # 0 Werte\n", + " # x = x[(x[\"weight(kg)\"] / ((x[\"height(cm)\"] / 100) ** 2)) >= 15]\n", + " # # Anderes -> 10-facher Normbereich\n", + " # x = x[x[\"AST\"] < 500] # 8 Werte\n", + " # x = x[x[\"ALT\"] < 500] # 4 Werte\n", + " # x = x[x[\"Gtp\"] < 660] # 26 Werte\n", + " # x = x[x[\"hemoglobin\"] < 180] # 0 Werte\n", + " # x = x[x[\"serum creatinine\"] < 12] # 0 Werte\n", + " # x = x[x[\"fasting blood sugar\"] < 1000] # 0 Werte\n", " \n", " return x \n", "\n", - "print(len(clean_up(original)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19704c5b", - "metadata": {}, - "outputs": [], - "source": [ - "y = original[\"fasting blood sugar\"].sort_values()\n", - "y.tail(30)" + "print(33467-len(clean_up(original)))" ] } ],