Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
beta-vae-normalizing-flows
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Kaan Güney Keklikçi
beta-vae-normalizing-flows
Commits
9e99b4f9
Commit
9e99b4f9
authored
Oct 06, 2021
by
Kaan Güney Keklikçi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
preprocessing modules, currently using v1.1
parent
48384b93
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1854 additions
and
0 deletions
+1854
-0
preprocessing_v1.0.ipynb
...s/thoracic-surgery/preprocessing/preprocessing_v1.0.ipynb
+1241
-0
preprocessing_v1.1.ipynb
...s/thoracic-surgery/preprocessing/preprocessing_v1.1.ipynb
+613
-0
No files found.
beta-vae-normalizing-flows/thoracic-surgery/preprocessing/preprocessing_v1.0.ipynb
0 → 100644
View file @
9e99b4f9
{
"cells": [
{
"cell_type": "code",
"execution_count": 77,
"id": "47dbe264",
"metadata": {},
"outputs": [],
"source": [
"import warnings \n",
"warnings.filterwarnings('ignore') \n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler, OrdinalEncoder\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt \n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "99532c99",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" <th>PRE6</th>\n",
" <th>PRE7</th>\n",
" <th>PRE8</th>\n",
" <th>PRE9</th>\n",
" <th>PRE10</th>\n",
" <th>PRE11</th>\n",
" <th>PRE14</th>\n",
" <th>PRE17</th>\n",
" <th>PRE19</th>\n",
" <th>PRE25</th>\n",
" <th>PRE30</th>\n",
" <th>PRE32</th>\n",
" <th>AGE</th>\n",
" <th>Risk1Yr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>DGN2</td>\n",
" <td>2.88</td>\n",
" <td>2.16</td>\n",
" <td>PRZ1</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>T</td>\n",
" <td>OC14</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>60</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>DGN3</td>\n",
" <td>3.40</td>\n",
" <td>1.88</td>\n",
" <td>PRZ0</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>OC12</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>51</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>DGN3</td>\n",
" <td>2.76</td>\n",
" <td>2.08</td>\n",
" <td>PRZ1</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>OC11</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>59</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>DGN3</td>\n",
" <td>3.68</td>\n",
" <td>3.04</td>\n",
" <td>PRZ0</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>OC11</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>54</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>DGN3</td>\n",
" <td>2.44</td>\n",
" <td>0.96</td>\n",
" <td>PRZ2</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>T</td>\n",
" <td>OC11</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>73</td>\n",
" <td>T</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id DGN PRE4 PRE5 PRE6 PRE7 PRE8 PRE9 PRE10 PRE11 PRE14 PRE17 PRE19 \\\n",
"0 1 DGN2 2.88 2.16 PRZ1 F F F T T OC14 F F \n",
"1 2 DGN3 3.40 1.88 PRZ0 F F F F F OC12 F F \n",
"2 3 DGN3 2.76 2.08 PRZ1 F F F T F OC11 F F \n",
"3 4 DGN3 3.68 3.04 PRZ0 F F F F F OC11 F F \n",
"4 5 DGN3 2.44 0.96 PRZ2 F T F T T OC11 F F \n",
"\n",
" PRE25 PRE30 PRE32 AGE Risk1Yr \n",
"0 F T F 60 F \n",
"1 F T F 51 F \n",
"2 F T F 59 F \n",
"3 F F F 54 F \n",
"4 F T F 73 T "
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"filename = '/Users/kaanguney.keklikci/Desktop/Erasmus+/Heidelberg/data/ThoraricSurgery.csv'\n",
"df = pd.read_csv(filename)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "c6014eae",
"metadata": {},
"outputs": [],
"source": [
"df.drop('id',axis=1,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"id": "b82b994d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DGN object\n",
"PRE4 float64\n",
"PRE5 float64\n",
"PRE6 object\n",
"PRE7 object\n",
"PRE8 object\n",
"PRE9 object\n",
"PRE10 object\n",
"PRE11 object\n",
"PRE14 object\n",
"PRE17 object\n",
"PRE19 object\n",
"PRE25 object\n",
"PRE30 object\n",
"PRE32 object\n",
"AGE int64\n",
"Risk1Yr object\n",
"dtype: object"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 106,
"id": "a97006e6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAT9UlEQVR4nO3df7DldX3f8eeLBQQFA4Trdt1dB7Q0DabNwtwiUcchUM3KpFmSMRY7GrRk1nYgo9ZJFKcd0IaptirRxNLZBHSxKqEoQiklEiRxnFRgwRVY0LqVtezOwl5AUaJhuuu7f5zPfjmze+9yL5zvOXu5z8fMmfv9fr4/7ovL2fO63+/5nu9NVSFJEsAhkw4gSTp4WAqSpI6lIEnqWAqSpI6lIEnqHDrpAM/F2rVr6+abb550DElabDLXgkV9pPDoo49OOoIkPa8s6lKQJI2WpSBJ6lgKkqSOpSBJ6lgKkqROb6WQ5IgkdyT5VpItST7Yxj+T5MEkm9tjTRtPkk8m2ZrkniSn9pVNkjS7Pj+n8BRwZlU9meQw4OtJ/mdb9vtVde0+678ROKk9XgVc3r5KksaktyOFGniyzR7WHge6T/c64Kq23TeAY5Ks6CufJGl/vb6nkGRZks3ALuCWqrq9Lbq0nSK6LMkL2thK4KGhzbe3sX33uT7JpiSbZmZm+owvSUtOr7e5qKo9wJokxwDXJfkl4CLgYeBwYAPwPuBDC9jnhrYd09PTz7u/EPTTn9466QizOvLIsyYdQdIYjOXqo6r6IXAbsLaqdrZTRE8BnwZOa6vtAFYPbbaqjUmSxqTPq4+m2hECSY4EXg98e+/7BEkCnAPc1za5AfiddhXS6cATVbWzr3ySpP31efpoBbAxyTIG5XNNVd2Y5KtJphjcpW8z8K/a+jcBZwNbgZ8A7+gxmyRpFr2VQlXdA5wyy/iZc6xfwAV95ZEkPTM/0SxJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6vRWCkmOSHJHkm8l2ZLkg238xCS3J9ma5M+THN7GX9Dmt7blJ/SVTZI0uz6PFJ4CzqyqXwbWAGuTnA58BLisqv4+8APg/Lb++cAP2vhlbT1J0hj1Vgo18GSbPaw9CjgTuLaNbwTOadPr2jxt+VlJ0lc+SdL+en1PIcmyJJuBXcAtwP8BflhVu9sq24GVbXol8BBAW/4E8POz7HN9kk1JNs3MzPQZX5KWnF5Loar2VNUaYBVwGvAPR7DPDVU1XVXTU1NTz3V3kqQhY7n6qKp+CNwG/ApwTJJD26JVwI42vQNYDdCW/xzw2DjySZIG+rz6aCrJMW36SOD1wAMMyuFNbbXzgOvb9A1tnrb8q1VVfeWTJO3v0Gde5VlbAWxMsoxB+VxTVTcmuR+4OskfAt8ErmjrXwF8NslW4HHg3B6zSZJm0VspVNU9wCmzjH+PwfsL+47/HfDbfeWRJD0zP9EsSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkTm+lkGR1ktuS3J9kS5J3tfFLkuxIsrk9zh7a5qIkW5N8J8mv9ZVNkjS7Q3vc927gvVV1d5KjgbuS3NKWXVZVHx1eOcnJwLnAK4GXAn+Z5B9U1Z4eM0qShvR2pFBVO6vq7jb9Y+ABYOUBNlkHXF1VT1XVg8BW4LS+8kmS9jeW9xSSnACcAtzehi5Mck+SK5Mc28ZWAg8NbbadWUokyfokm5JsmpmZ6TO2JC05vZdCkqOALwLvrqofAZcDrwDWADuBjy1kf1W1oaqmq2p6ampq1HElaUnrtRSSHMagED5XVV8CqKpHqmpPVf0M+FOePkW0A1g9tPmqNiZJGpM+rz4KcAXwQFV9fGh8xdBqvwnc16ZvAM5N8oIkJwInAXf0lU+StL8+rz56DfA24N4km9vYB4C3JFkDFLANeCdAVW1Jcg1wP4Mrly7wyiNJGq/eSqGqvg5klkU3HWCbS4FL+8okSTowP9EsSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSer0VgpJVie5Lcn9SbYkeVcbPy7JLUm+274e28aT5JNJtia5J8mpfWWTJM2uzyOF3cB7q+pk4HTggiQnA+8Hbq2qk4Bb2zzAG4GT2mM9cHmP2SRJs5hXKSS5dT5jw6pqZ1Xd3aZ/DDwArATWARvbahuBc9r0OuCqGvgGcEySFfPJJ0kajUMPtDDJEcALgePbaZ60RS9m8AI/L0lOAE4BbgeWV9XOtuhhYHmbXgk8NLTZ9ja2c2iMJOsZHEnwspe9bL4RJEnzcMBSAN4JvBt4KXAXT5fCj4A/mc83SHIU8EXg3VX1oyTdsqqqJLWQwFW1AdgAMD09vaBtJUkHdsBSqKpPAJ9I8ntV9ccL3XmSwxgUwueq6ktt+JEkK6pqZzs9tKuN7wBWD22+qo1JksbkmY4UAKiqP07yauCE4W2q6qq5tsngkOAK4IGq+vjQohuA84APt6/XD41fmORq4FXAE0OnmSRJYzCvUkjyWeAVwGZgTxsuYM5SAF4DvA24N8nmNvYBBmVwTZLzge8Db27LbgLOBrYCPwHeMd//CEnSaMyrFIBp4OSqmvc5/Kr6Ok+/B7Gvs2ZZv4AL5rt/SdLozfdzCvcBf6/PIJKkyZvvkcLxwP1J7gCe2jtYVb/RSypJ0kTMtxQu6TOEJOngMN+rj/667yCSpMmb79VHP2ZwtRHA4cBhwN9W1Yv7CiZJGr/5HikcvXe6ff5gHYOb3EmSnkcWfJfUdsO6LwO/Nvo4kqRJmu/po98amj2EwecW/q6XRJKkiZnv1Uf/bGh6N7CNwSkkSdLzyHzfU/CWE5K0BMz3j+ysSnJdkl3t8cUkq/oOJ0kar/m+0fxpBncxfWl7/Pc2Jkl6HplvKUxV1aerand7fAaY6jGXJGkC5lsKjyV5a5Jl7fFW4LE+g0mSxm++pfAvGfzdg4cZ/M3kNwFv7ymTJGlC5ntJ6oeA86rqBwBJjgM+yqAsJEnPE/M9UvjHewsBoKoeB07pJ5IkaVLmWwqHJDl270w7UpjvUYYkaZGY7wv7x4D/leS/tfnfBi7tJ5IkaVLm+4nmq5JsAs5sQ79VVff3F0uSNAnzPgXUSsAikKTnsQXfOnu+klzZbolx39DYJUl2JNncHmcPLbsoydYk30nibbklaQJ6KwXgM8DaWcYvq6o17XETQJKTgXOBV7Zt/nOSZT1mkyTNordSqKqvAY/Pc/V1wNVV9VRVPQhsBU7rK5skaXZ9HinM5cIk97TTS3svc10JPDS0zvY2tp8k65NsSrJpZmam76yStKSMuxQuB14BrGFwu4yPLXQHVbWhqqaranpqynvySdIojbUUquqRqtpTVT8D/pSnTxHtAFYPrbqqjUmSxmispZBkxdDsbwJ7r0y6ATg3yQuSnAicBNwxzmySpB5vVZHkC8AZwPFJtgMXA2ckWQMUg7/z/E6AqtqS5BoGn4PYDVxQVXv6yiZJml1vpVBVb5ll+IoDrH8p3jpDkiZqElcfSZIOUpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKnTWykkuTLJriT3DY0dl+SWJN9tX49t40nyySRbk9yT5NS+ckmS5tbnkcJngLX7jL0fuLWqTgJubfMAbwROao/1wOU95pIkzaG3UqiqrwGP7zO8DtjYpjcC5wyNX1UD3wCOSbKir2ySpNmN+z2F5VW1s00/DCxv0yuBh4bW297G9pNkfZJNSTbNzMz0l1SSlqCJvdFcVQXUs9huQ1VNV9X01NRUD8kkaekadyk8sve0UPu6q43vAFYPrbeqjUmSxmjcpXADcF6bPg+4fmj8d9pVSKcDTwydZpIkjcmhfe04yReAM4Djk2wHLgY+DFyT5Hzg+8Cb2+o3AWcDW4GfAO/oK5ckaW69lUJVvWWORWfNsm4BF/SVRZI0P36iWZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUOXQS3zTJNuDHwB5gd1VNJzkO+HPgBGAb8Oaq+sEk8knSUjXJI4Vfrao1VTXd5t8P3FpVJwG3tnlJ0hhN5EhhDuuAM9r0RuCvgPctdCeX3/m10SUaoX/9T1436QiS9IwmdaRQwFeS3JVkfRtbXlU72/TDwPLZNkyyPsmmJJtmZmbGkVWSloxJHSm8tqp2JHkJcEuSbw8vrKpKUrNtWFUbgA0A09PTs64jSXp2JlIKVbWjfd2V5DrgNOCRJCuqameSFcCuSWTTc/O12x6YdIRZve5Xf3HSEaRFYeynj5K8KMnRe6eBNwD3ATcA57XVzgOuH3c2SVrqJnGksBy4Lsne7//5qro5yZ3ANUnOB74PvHkC2SRpSRt7KVTV94BfnmX8MeCsceeRJD3NTzRLkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqT+hvNkkbsP335jklHmNPvn3PapCNonjxSkCR1LAVJUsdSkCR1LAVJUsdSkCR1Drqrj5KsBT4BLAP+rKo+POFIksZg5sY/mHSEWU39+n+cdISxOqiOFJIsAz4FvBE4GXhLkpMnm0qSlo6D7UjhNGBrVX0PIMnVwDrg/omm0pIwc9kfTjrCnKbe828nHUHP4MubL5p0hFmds+Y/LGj9VFVPURYuyZuAtVX1u23+bcCrqurCoXXWA+vb7C8A3+kx0vHAoz3uv2/mn6zFnH8xZwfzP5NHq2rtbAsOtiOFZ1RVG4AN4/heSTZV1fQ4vlcfzD9Zizn/Ys4O5n8uDqr3FIAdwOqh+VVtTJI0BgdbKdwJnJTkxCSHA+cCN0w4kyQtGQfV6aOq2p3kQuAvGFySemVVbZlgpLGcpuqR+SdrMedfzNnB/M/aQfVGsyRpsg6200eSpAmyFCRJnSVRCkn2JNmcZEuSbyV5b5JDhpafluSvknw3yd1J/keSf9SWXZLkJ0leMrT+k+3r6iS3Jbm/7ftdiyz/EUnuaPvckuSDiyn/0PyyJN9McuNiy5/kPW2/9yX5QpIjFln+bUnubfvfNOrsY8h/TJJrk3w7yQNJfmURZb8yya4k940yM1X1vH8ATw5NvwT4S+CDbX45sA149dA6rwXOadOXAP8X+Mi++wNWAKe26aOB/w2cvIjyBziqTR8G3A6cvljyD83/G+DzwI2L7PmzEngQOLLNXwO8fbHkb9PbgOP7+LmPKf9G4Hfb9OHAMYso++uAU4H7Rpq5z/+ZB8tjlheRlwOPMXhR/Pd7/yfNse0l7bENOG62/Q2tez3w+sWYH3ghcDeDT5AvmvwMPstyK3AmYyiFUeZnUAoPAccxuBLwRuANiyV/m97GGEthxD//n2NQylls2YfWOYERl8KSOH20rxrcW2kZg+Z+JYMXwwN5ErgSmPP0UJITgFMY/Lbdq1Hmb6deNgO7gFuqalHlB/4I+APgZyOMeECjyl9VO4CPMvhtcCfwRFV9ZeSB9zHin38BX0lyVwa3oOndCPOfCMwAn26nH/8syYtGnXdYH689o7YkS+FAktzezi1+Yp9FnwTOS3L0LNscBXwReHdV/WgcOeey0PxVtaeq1jD4jfu0JL80pqizWkj+JL8O7Kqqu8Ya8gAWmP9YBjd8PBF4KfCiJG8dX9r9PYvn/2ur6lQGdza+IMnrxhJ0DgvMfyiD0y+XV9UpwN8C7x9T1P08m9eePizJUkjycmAPg9+OtzB4YgBQVa8C/h2DQ0uGxn/I4Lz1Bfvs6zAGhfC5qvpSr8Gf/p4jy7/P8tuAWW+SNUojzP8a4DeSbAOuBs5M8l/7zA4jzf9PgQeraqaq/h/wJeDVvYZntM+fdrRDVe0CrmNwp+NejTD/dmD70NHxtcP76kMf/3ZHbcmVQpIp4L8Af1KDk3KfAt6eZPgf4wvn2PzjwDtpnwRPEuAK4IGq+nh/qZ824vxTSY5p00cCrwe+3VN09n5PRpS/qi6qqlVVdQKDW6J8tap6/U17lPkZnDY6PckL23PpLOCBfpIPjPj586K9v7220y5vAEZ7Jcw+Rvz8eRh4KMkvtOVn0eNt+kf83OnNQXWbix4d2c6bHwbsBj7L4IdMVT2c5J8DH0mykkGDPwp8aN+dVNWjSa4D3tOGXgO8Dbi37R/gA1V10yLJvwLYmMEfNzoEuKaq+riss6/849JL/qq6Pcm1DM4r7wa+ST+3N+jr578cuG7QZxwKfL6qbl5E+QF+D/hcBvda+x7wjsWSPckXgDOA45NsBy6uqiuea2BvcyFJ6iy500eSpLlZCpKkjqUgSepYCpKkjqUgSepYCtICpOc7vkqTtlQ+pyCNyk/bbUFoL+6fB14MXJxkOYM7nf6Lqvqbts5rgVcA97btHwXeC7xvzLmlefFzCtICJHmyqo4amn85cCdwPIMPHf2sqi6eY9tL2uTbGdxy/fF99ydNmqePpOdgMdz1UloIS0HqycFy10tpISwF6TlYDHe9lBbCN5qlZ2nfu14m+RRwe5K/2PtGMwe+6+Wd+G9QBxmfkNLCLPY7vkoH5NVHkqSO7ylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjr/H6Hzdzf0kiVSAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.countplot(df.DGN,palette='Set3')\n",
"sns.despine()"
]
},
{
"cell_type": "code",
"execution_count": 107,
"id": "b25a11b2",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAASGklEQVR4nO3df6wdZ33n8fcnjoG0QEM2l9TYRqGsV7uhu3Wi20BLVdFEbE22W9OqpWEFDWwks9ukgipq+SFVBNRIrVRIoaVZuZuAQ2lTbyEly6bpZkO6KNoliZOaEMcgXAiKLSe+IfxICk1l8+0f5/GTI+faOU7unHPj+35JozPzzDNzv0dz7c+d58zMSVUhSRLASbMuQJK0fBgKkqTOUJAkdYaCJKkzFCRJ3cmzLuCZ2LRpU910002zLkOSnm1ytBXP6jOFhx9+eNYlSNIJ5VkdCpKkpWUoSJI6Q0GS1BkKkqTOUJAkdYOFQpLnJbkjyReS7Eryvtb+sSRfS7KzTRtbe5J8OMmeJPckOWeo2iRJixvyPoXHgfOq6rEkq4Hbkvx1W/ebVfWXR/R/HbChTa8ErmqvkqQpGexMoUYea4ur23Ss53RvBq5t230eODXJmqHqkyQ92aCfKSRZlWQncAC4uapub6uuaENEVyZ5bmtbCzwwtvne1nbkPrck2ZFkx8LCwpDlS9KKM+hjLqrqELAxyanA9Ul+FHg38CDwHGAr8E7g/cexz61tO+bn5/2GoBXie9+7ZdYlnPBOOeX8WZegZWAqVx9V1beAW4FNVbW/DRE9DnwUOLd12wesH9tsXWuTJE3JkFcfzbUzBJKcArwW+NLhzwmSBHg9cG/b5AbgV9tVSK8Cvl1V+4eqT5L0ZEMOH60BtiVZxSh8tlfVZ5J8Nskco6f07QT+S+t/I3ABsAf4LvDWAWuTJC1isFCoqnuAsxdpP+8o/Qu4ZKh6JElPzTuaJUmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUDRYKSZ6X5I4kX0iyK8n7WvvLktyeZE+Sv0jynNb+3La8p60/c6jaJEmLG/JM4XHgvKr6MWAjsCnJq4DfA66sqn8JfBO4uPW/GPhma7+y9ZMkTdFgoVAjj7XF1W0q4DzgL1v7NuD1bX5zW6atPz9JhqpPkvRkg36mkGRVkp3AAeBm4O+Bb1XVwdZlL7C2za8FHgBo678N/ItF9rklyY4kOxYWFoYsX5JWnEFDoaoOVdVGYB1wLvCvl2CfW6tqvqrm5+bmnunuJEljpnL1UVV9C7gV+Ang1CQnt1XrgH1tfh+wHqCt/yHgG9OoT5I0MuTVR3NJTm3zpwCvBXYzCodfat0uAj7d5m9oy7T1n62qGqo+SdKTnfzUXZ62NcC2JKsYhc/2qvpMkvuA65L8DvB3wNWt/9XAx5PsAR4BLhywNknSIgYLhaq6Bzh7kfavMvp84cj2fwR+eah6JElPzTuaJUmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkrrBQiHJ+iS3Jrkvya4kb2/tlyfZl2Rnmy4Y2+bdSfYk+XKSnx2qNknS4k4ecN8Hgcuq6u4kLwDuSnJzW3dlVf3+eOckZwEXAq8AXgL8nyT/qqoODVijJGnMYGcKVbW/qu5u848Cu4G1x9hkM3BdVT1eVV8D9gDnDlWfJOnJpvKZQpIzgbOB21vTpUnuSXJNkhe1trXAA2Ob7WWREEmyJcmOJDsWFhaGLFuSVpzBQyHJ84FPAu+oqu8AVwEvBzYC+4EPHM/+qmprVc1X1fzc3NxSlytJK9qgoZBkNaNA+ERVfQqgqh6qqkNV9X3gT3hiiGgfsH5s83WtTZI0JUNefRTgamB3VX1wrH3NWLdfAO5t8zcAFyZ5bpKXARuAO4aqT5L0ZENeffRq4M3AF5PsbG3vAd6YZCNQwP3A2wCqaleS7cB9jK5cusQrjyRpugYLhaq6Dcgiq248xjZXAFcMVZMk6di8o1mS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd1goZBkfZJbk9yXZFeSt7f205LcnOQr7fVFrT1JPpxkT5J7kpwzVG2SpMUNeaZwELisqs4CXgVckuQs4F3ALVW1AbilLQO8DtjQpi3AVQPWJklaxEShkOSWSdrGVdX+qrq7zT8K7AbWApuBba3bNuD1bX4zcG2NfB44NcmaSeqTJC2Nk4+1MsnzgB8ATm/DPGmrXsjoP/iJJDkTOBu4HTijqva3VQ8CZ7T5tcADY5vtbW37x9pIsoXRmQQvfelLJy1BkjSBY4YC8DbgHcBLgLt4IhS+A/zRJD8gyfOBTwLvqKrvJOnrqqqS1PEUXFVbga0A8/Pzx7WtJOnYjhkKVfUh4ENJfr2q/vB4d55kNaNA+ERVfao1P5RkTVXtb8NDB1r7PmD92ObrWpskaUqe6kwBgKr6wyQ/CZw5vk1VXXu0bTI6Jbga2F1VHxxbdQNwEfC77fXTY+2XJrkOeCXw7bFhJknSFEwUCkk+Drwc2Akcas0FHDUUgFcDbwa+mGRna3sPozDYnuRi4OvAG9q6G4ELgD3Ad4G3TvomJElLY6JQAOaBs6pq4jH8qrqNJz6DONL5i/Qv4JJJ9y9JWnqT3qdwL/DDQxYiSZq9Sc8UTgfuS3IH8Pjhxqr6+UGqkiTNxKShcPmQRUiSlodJrz76v0MXIkmavUmvPnqU0dVGAM8BVgP/UFUvHKowSdL0TXqm8ILD8+3+g82MHnInSTqBHPdTUtsD6/4K+NmlL0eSNEuTDh/94tjiSYzuW/jHQSqSJM3MpFcf/cex+YPA/YyGkCRJJ5BJP1PwkROStAJM+iU765Jcn+RAmz6ZZN3QxUmSpmvSD5o/yugppi9p0/9sbZKkE8ikoTBXVR+tqoNt+hgwN2BdkqQZmDQUvpHkTUlWtelNwDeGLEySNH2ThsJ/ZvS9Bw8y+s7kXwLeMlBNkqQZmfSS1PcDF1XVNwGSnAb8PqOwkCSdICY9U/h3hwMBoKoeAc4epiRJ0qxMGgonJXnR4YV2pjDpWYYk6Vli0v/YPwD8/yT/oy3/MnDFMCVJkmZl0juar02yAzivNf1iVd03XFmSpFmYeAiohYBBIEknsON+dPakklzTHolx71jb5Un2JdnZpgvG1r07yZ4kX07iY7klaQYGCwXgY8CmRdqvrKqNbboRIMlZwIXAK9o2f5xk1YC1SZIWMVgoVNXngEcm7L4ZuK6qHq+qrwF7gHOHqk2StLghzxSO5tIk97ThpcOXua4FHhjrs7e1PUmSLUl2JNmxsLAwdK2StKJMOxSuAl4ObGT0uIwPHO8OqmprVc1X1fzcnM/kk6SlNNVQqKqHqupQVX0f+BOeGCLaB6wf67qutUmSpmiqoZBkzdjiLwCHr0y6AbgwyXOTvAzYANwxzdokSQM+qiLJnwOvAU5Pshd4L/CaJBuBYvQ9z28DqKpdSbYzug/iIHBJVR0aqjZJ0uIGC4WqeuMizVcfo/8V+OgMSZqpWVx9JElapgwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUDRYKSa5JciDJvWNtpyW5OclX2uuLWnuSfDjJniT3JDlnqLokSUc35JnCx4BNR7S9C7ilqjYAt7RlgNcBG9q0BbhqwLokSUcxWChU1eeAR45o3gxsa/PbgNePtV9bI58HTk2yZqjaJEmLm/ZnCmdU1f42/yBwRptfCzww1m9va3uSJFuS7EiyY2FhYbhKJWkFmtkHzVVVQD2N7bZW1XxVzc/NzQ1QmSStXNMOhYcODwu11wOtfR+wfqzfutYmSZqiaYfCDcBFbf4i4NNj7b/arkJ6FfDtsWEmSdKUnDzUjpP8OfAa4PQke4H3Ar8LbE9yMfB14A2t+43ABcAe4LvAW4eqS5J0dIOFQlW98Sirzl+kbwGXDFWLJGky3tEsSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTu5Fn80CT3A48Ch4CDVTWf5DTgL4AzgfuBN1TVN2dRnyStVLM8U/iZqtpYVfNt+V3ALVW1AbilLUuSpmgmZwpHsRl4TZvfBvwt8M6l2vlVd35uqXalY/ivP/7Tsy5B0jMwqzOFAv53kruSbGltZ1TV/jb/IHDGYhsm2ZJkR5IdCwsL06hVklaMWZ0p/FRV7UvyYuDmJF8aX1lVlaQW27CqtgJbAebn5xftI0l6emYSClW1r70eSHI9cC7wUJI1VbU/yRrgwCxqk7T0Pnfr7lmXcML76Z/5N0uyn6kPHyX5wSQvODwP/HvgXuAG4KLW7SLg09OuTZJWulmcKZwBXJ/k8M//s6q6KcmdwPYkFwNfB94wg9okaUWbeihU1VeBH1uk/RvA+dOuR5L0BO9oliR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSt+xCIcmmJF9OsifJu2ZdjyStJMsqFJKsAj4CvA44C3hjkrNmW5UkrRzLKhSAc4E9VfXVqvon4Dpg84xrkqQV4+RZF3CEtcADY8t7gVeOd0iyBdjSFh9L8uUp1TYLpwMPz7qI4/Frsy5geXnWHT91J/qxu6mqNi22YrmFwlOqqq3A1lnXMQ1JdlTV/Kzr0NPj8Xv2WsnHbrkNH+0D1o8tr2ttkqQpWG6hcCewIcnLkjwHuBC4YcY1SdKKsayGj6rqYJJLgb8BVgHXVNWuGZc1SytimOwE5vF79lqxxy5VNesaJEnLxHIbPpIkzZChIEnqDIUpSHIoyc4ku5J8IcllSU4aW39ukr9N8pUkdyf5X0n+bVt3eZLvJnnxWP/H2uv6JLcmua/t++3Tf3cnvgGP3/OS3NH2uSvJ+6b/7k58Qx2/seVVSf4uyWem966Gs6w+aD6Bfa+qNgK0X64/A14IvDfJGcB24D9V1f9rfX4KeDnwxbb9w8BlwDuP2O9B4LKqujvJC4C7ktxcVfcN/YZWmKGO3+PAeVX1WJLVwG1J/rqqPj/0G1phhjp+h70d2N32+aznmcKUVdUBRndkX5okwKXAtsO/kK3PbVX1V2ObXQP8SpLTjtjX/qq6u80/yugXc+3Ab2FFW+LjV1V1+K/O1W3yyo8BLeXxA0iyDvgPwH8ftPApMhRmoKq+yuiS2xcDrwDufopNHmP0i3nU4aEkZwJnA7cvTZU6mqU8fm3oYSdwALi5qjx+A1vif39/APwW8P0lLHGmDIVlJsntSXYn+dARqz4MXNSGiY7c5vnAJ4F3VNV3plGnFne8x6+qDrWhjXXAuUl+dEqlahHHc/yS/BxwoKrummqRAzMUZiDJjwCHGP11uAs45/C6qnol8NvAD41vU1XfYjQWeskR+1rNKBA+UVWfGrRwAUt7/I5Yfyuw6EPKtHSW8Pi9Gvj5JPczeqLzeUn+dMjap8FQmLIkc8B/A/6oRncOfgR4S5KfHOv2A0fZ/IPA22gXCLQx0auB3VX1weGq1mFLfPzmkpza5k8BXgt8aaDSxdIev6p6d1Wtq6ozGT2S57NV9abBip8Srz6ajlPauPFqRlcMfZzRLxhV9WCSXwF+L8laRn+9PAy8/8idVNXDSa4HfqM1vRp4M/DFtn+A91TVjQO+l5VoqOO3BtiW0ZdLnQRsr6oT4rLGZWao43dC8jEXkqTO4SNJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCdByGfuKmNGvepyAdn6GfuCnNlPcpSMchyWNV9fyx5R8B7gROZ3TD0/er6r1H2fbyNvsW4JyqeuTI/Umz5vCR9AwM8cRbaZYMBWkgT+eJt9KsGQrSMzDEE1OlWfKDZulpOvKJm0k+Atye5G/GvsnrWE/cvBP/DWqZ8RdSOj4+cVMnNK8+kiR1fqYgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqftn20qlbf0HGsAAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"dgn_types = ['DGN1','DGN8', 'DGN5', 'DGN6']\n",
"df = df[~df.DGN.isin(dgn_types)]\n",
"\n",
"sns.countplot(df.DGN,palette='Set3')\n",
"sns.despine()"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "19387a02",
"metadata": {},
"outputs": [],
"source": [
"X, y = df.drop('Risk1Yr',axis=1), df.Risk1Yr"
]
},
{
"cell_type": "markdown",
"id": "bdde937d",
"metadata": {},
"source": [
"### Define the encoder"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "dad842ec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OrdinalEncoder()"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ordinal_encoder = OrdinalEncoder()\n",
"\n",
"ordinal_encoder"
]
},
{
"cell_type": "markdown",
"id": "01b55142",
"metadata": {},
"source": [
"### Training, validation split"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "dba575cd",
"metadata": {},
"outputs": [],
"source": [
"X_train,X_test,y_train,y_test = train_test_split(X, \n",
" y,\n",
" test_size=.33,\n",
" random_state=42,\n",
" shuffle=True,\n",
" stratify=y)"
]
},
{
"cell_type": "markdown",
"id": "f2048621",
"metadata": {},
"source": [
"### Encode data"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "004f2b3a",
"metadata": {},
"outputs": [],
"source": [
"X_train.reset_index(drop=True,inplace=True)\n",
"X_test.reset_index(drop=True,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "8a30725b",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"DGN object\n",
"PRE4 float64\n",
"PRE5 float64\n",
"PRE6 object\n",
"PRE7 object\n",
"PRE8 object\n",
"PRE9 object\n",
"PRE10 object\n",
"PRE11 object\n",
"PRE14 object\n",
"PRE17 object\n",
"PRE19 object\n",
"PRE25 object\n",
"PRE30 object\n",
"PRE32 object\n",
"AGE int64\n",
"dtype: object"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "594ee37c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" <th>PRE6</th>\n",
" <th>PRE7</th>\n",
" <th>PRE8</th>\n",
" <th>PRE9</th>\n",
" <th>PRE10</th>\n",
" <th>PRE11</th>\n",
" <th>PRE14</th>\n",
" <th>PRE17</th>\n",
" <th>PRE19</th>\n",
" <th>PRE25</th>\n",
" <th>PRE30</th>\n",
" <th>PRE32</th>\n",
" <th>AGE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>66.0</td>\n",
" <td>59.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>29.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>95.0</td>\n",
" <td>86.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>64.0</td>\n",
" <td>47.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>41.0</td>\n",
" <td>34.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>94.0</td>\n",
" <td>83.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>39.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DGN PRE4 PRE5 PRE6 PRE7 PRE8 PRE9 PRE10 PRE11 PRE14 PRE17 PRE19 \\\n",
"0 1.0 66.0 59.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 0.0 \n",
"1 1.0 95.0 86.0 1.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 \n",
"2 1.0 64.0 47.0 1.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 \n",
"3 0.0 41.0 34.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 \n",
"4 1.0 94.0 83.0 1.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 \n",
"\n",
" PRE25 PRE30 PRE32 AGE \n",
"0 0.0 1.0 0.0 29.0 \n",
"1 0.0 1.0 0.0 24.0 \n",
"2 0.0 0.0 0.0 19.0 \n",
"3 0.0 1.0 0.0 19.0 \n",
"4 0.0 1.0 0.0 39.0 "
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# encode training \n",
"X_train = pd.DataFrame(ordinal_encoder.fit_transform(X_train), columns=X_train.columns)\n",
"X_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "73afc3db",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" <th>PRE6</th>\n",
" <th>PRE7</th>\n",
" <th>PRE8</th>\n",
" <th>PRE9</th>\n",
" <th>PRE10</th>\n",
" <th>PRE11</th>\n",
" <th>PRE14</th>\n",
" <th>PRE17</th>\n",
" <th>PRE19</th>\n",
" <th>PRE25</th>\n",
" <th>PRE30</th>\n",
" <th>PRE32</th>\n",
" <th>AGE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>66.0</td>\n",
" <td>65.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>28.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>43.0</td>\n",
" <td>42.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>74.0</td>\n",
" <td>69.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>17.0</td>\n",
" <td>17.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2.0</td>\n",
" <td>76.0</td>\n",
" <td>76.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DGN PRE4 PRE5 PRE6 PRE7 PRE8 PRE9 PRE10 PRE11 PRE14 PRE17 PRE19 \\\n",
"0 1.0 66.0 65.0 1.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 \n",
"1 0.0 43.0 42.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 0.0 \n",
"2 1.0 74.0 69.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 1.0 17.0 17.0 2.0 0.0 1.0 0.0 1.0 1.0 0.0 0.0 0.0 \n",
"4 2.0 76.0 76.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" PRE25 PRE30 PRE32 AGE \n",
"0 0.0 1.0 0.0 28.0 \n",
"1 0.0 1.0 0.0 8.0 \n",
"2 0.0 1.0 0.0 3.0 \n",
"3 0.0 1.0 0.0 22.0 \n",
"4 0.0 1.0 0.0 6.0 "
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# encode testing \n",
"X_test = pd.DataFrame(ordinal_encoder.fit_transform(X_test), columns=X_test.columns)\n",
"X_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "732812a1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DGN float64\n",
"PRE4 float64\n",
"PRE5 float64\n",
"PRE6 float64\n",
"PRE7 float64\n",
"PRE8 float64\n",
"PRE9 float64\n",
"PRE10 float64\n",
"PRE11 float64\n",
"PRE14 float64\n",
"PRE17 float64\n",
"PRE19 float64\n",
"PRE25 float64\n",
"PRE30 float64\n",
"PRE32 float64\n",
"AGE float64\n",
"dtype: object"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.dtypes"
]
},
{
"cell_type": "markdown",
"id": "5893ec4e",
"metadata": {},
"source": [
"### Standardize data"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "04212592",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"StandardScaler()"
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler = StandardScaler()\n",
"scaler.fit(X_train)"
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "2bd71c25",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)\n",
"X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "e4dace09",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" <th>PRE6</th>\n",
" <th>PRE7</th>\n",
" <th>PRE8</th>\n",
" <th>PRE9</th>\n",
" <th>PRE10</th>\n",
" <th>PRE11</th>\n",
" <th>PRE14</th>\n",
" <th>PRE17</th>\n",
" <th>PRE19</th>\n",
" <th>PRE25</th>\n",
" <th>PRE30</th>\n",
" <th>PRE32</th>\n",
" <th>AGE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.027613</td>\n",
" <td>0.601799</td>\n",
" <td>0.549365</td>\n",
" <td>0.377562</td>\n",
" <td>-0.252646</td>\n",
" <td>2.513379</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>-0.463222</td>\n",
" <td>0.432204</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>0.533867</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.027613</td>\n",
" <td>1.675779</td>\n",
" <td>1.589536</td>\n",
" <td>0.377562</td>\n",
" <td>-0.252646</td>\n",
" <td>-0.397871</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>-0.463222</td>\n",
" <td>0.432204</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.049382</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.027613</td>\n",
" <td>0.527732</td>\n",
" <td>0.087066</td>\n",
" <td>0.377562</td>\n",
" <td>-0.252646</td>\n",
" <td>-0.397871</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>-0.463222</td>\n",
" <td>0.432204</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>-2.110579</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.632630</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-2.043373</td>\n",
" <td>-0.324046</td>\n",
" <td>-0.413757</td>\n",
" <td>-1.542244</td>\n",
" <td>-0.252646</td>\n",
" <td>-0.397871</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>-0.463222</td>\n",
" <td>0.432204</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.632630</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.027613</td>\n",
" <td>1.638746</td>\n",
" <td>1.473962</td>\n",
" <td>0.377562</td>\n",
" <td>-0.252646</td>\n",
" <td>-0.397871</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>-0.463222</td>\n",
" <td>0.432204</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>1.700363</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DGN PRE4 PRE5 PRE6 PRE7 PRE8 PRE9 \\\n",
"0 0.027613 0.601799 0.549365 0.377562 -0.252646 2.513379 -0.288154 \n",
"1 0.027613 1.675779 1.589536 0.377562 -0.252646 -0.397871 -0.288154 \n",
"2 0.027613 0.527732 0.087066 0.377562 -0.252646 -0.397871 -0.288154 \n",
"3 -2.043373 -0.324046 -0.413757 -1.542244 -0.252646 -0.397871 -0.288154 \n",
"4 0.027613 1.638746 1.473962 0.377562 -0.252646 -0.397871 -0.288154 \n",
"\n",
" PRE10 PRE11 PRE14 PRE17 PRE19 PRE25 PRE30 \\\n",
"0 0.644278 -0.463222 0.432204 -0.274352 -0.081923 -0.130189 0.473804 \n",
"1 0.644278 -0.463222 0.432204 -0.274352 -0.081923 -0.130189 0.473804 \n",
"2 0.644278 -0.463222 0.432204 -0.274352 -0.081923 -0.130189 -2.110579 \n",
"3 0.644278 -0.463222 0.432204 -0.274352 -0.081923 -0.130189 0.473804 \n",
"4 0.644278 -0.463222 0.432204 -0.274352 -0.081923 -0.130189 0.473804 \n",
"\n",
" PRE32 AGE \n",
"0 -0.081923 0.533867 \n",
"1 -0.081923 -0.049382 \n",
"2 -0.081923 -0.632630 \n",
"3 -0.081923 -0.632630 \n",
"4 -0.081923 1.700363 "
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "7543c9a4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" <th>PRE6</th>\n",
" <th>PRE7</th>\n",
" <th>PRE8</th>\n",
" <th>PRE9</th>\n",
" <th>PRE10</th>\n",
" <th>PRE11</th>\n",
" <th>PRE14</th>\n",
" <th>PRE17</th>\n",
" <th>PRE19</th>\n",
" <th>PRE25</th>\n",
" <th>PRE30</th>\n",
" <th>PRE32</th>\n",
" <th>AGE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.027613</td>\n",
" <td>0.601799</td>\n",
" <td>0.780514</td>\n",
" <td>0.377562</td>\n",
" <td>-0.252646</td>\n",
" <td>-0.397871</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>-0.463222</td>\n",
" <td>0.432204</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>0.417217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-2.043373</td>\n",
" <td>-0.249978</td>\n",
" <td>-0.105558</td>\n",
" <td>0.377562</td>\n",
" <td>-0.252646</td>\n",
" <td>2.513379</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>-0.463222</td>\n",
" <td>0.432204</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>-1.915777</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.027613</td>\n",
" <td>0.898070</td>\n",
" <td>0.934613</td>\n",
" <td>-1.542244</td>\n",
" <td>-0.252646</td>\n",
" <td>-0.397871</td>\n",
" <td>-0.288154</td>\n",
" <td>-1.552125</td>\n",
" <td>-0.463222</td>\n",
" <td>-1.041219</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>-2.499025</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.027613</td>\n",
" <td>-1.212857</td>\n",
" <td>-1.068680</td>\n",
" <td>2.297368</td>\n",
" <td>-0.252646</td>\n",
" <td>2.513379</td>\n",
" <td>-0.288154</td>\n",
" <td>0.644278</td>\n",
" <td>2.158791</td>\n",
" <td>-1.041219</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.282681</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2.098599</td>\n",
" <td>0.972137</td>\n",
" <td>1.204288</td>\n",
" <td>0.377562</td>\n",
" <td>-0.252646</td>\n",
" <td>-0.397871</td>\n",
" <td>-0.288154</td>\n",
" <td>-1.552125</td>\n",
" <td>-0.463222</td>\n",
" <td>-1.041219</td>\n",
" <td>-0.274352</td>\n",
" <td>-0.081923</td>\n",
" <td>-0.130189</td>\n",
" <td>0.473804</td>\n",
" <td>-0.081923</td>\n",
" <td>-2.149076</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DGN PRE4 PRE5 PRE6 PRE7 PRE8 PRE9 \\\n",
"0 0.027613 0.601799 0.780514 0.377562 -0.252646 -0.397871 -0.288154 \n",
"1 -2.043373 -0.249978 -0.105558 0.377562 -0.252646 2.513379 -0.288154 \n",
"2 0.027613 0.898070 0.934613 -1.542244 -0.252646 -0.397871 -0.288154 \n",
"3 0.027613 -1.212857 -1.068680 2.297368 -0.252646 2.513379 -0.288154 \n",
"4 2.098599 0.972137 1.204288 0.377562 -0.252646 -0.397871 -0.288154 \n",
"\n",
" PRE10 PRE11 PRE14 PRE17 PRE19 PRE25 PRE30 \\\n",
"0 0.644278 -0.463222 0.432204 -0.274352 -0.081923 -0.130189 0.473804 \n",
"1 0.644278 -0.463222 0.432204 -0.274352 -0.081923 -0.130189 0.473804 \n",
"2 -1.552125 -0.463222 -1.041219 -0.274352 -0.081923 -0.130189 0.473804 \n",
"3 0.644278 2.158791 -1.041219 -0.274352 -0.081923 -0.130189 0.473804 \n",
"4 -1.552125 -0.463222 -1.041219 -0.274352 -0.081923 -0.130189 0.473804 \n",
"\n",
" PRE32 AGE \n",
"0 -0.081923 0.417217 \n",
"1 -0.081923 -1.915777 \n",
"2 -0.081923 -2.499025 \n",
"3 -0.081923 -0.282681 \n",
"4 -0.081923 -2.149076 "
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 122,
"id": "2ce29c1e",
"metadata": {},
"outputs": [],
"source": [
"# end of preprocessing"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
beta-vae-normalizing-flows/thoracic-surgery/preprocessing/preprocessing_v1.1.ipynb
0 → 100644
View file @
9e99b4f9
{
"cells": [
{
"cell_type": "code",
"execution_count": 41,
"id": "47dbe264",
"metadata": {},
"outputs": [],
"source": [
"import warnings \n",
"warnings.filterwarnings('ignore') \n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler, OrdinalEncoder\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt \n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "99532c99",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" <th>PRE6</th>\n",
" <th>PRE7</th>\n",
" <th>PRE8</th>\n",
" <th>PRE9</th>\n",
" <th>PRE10</th>\n",
" <th>PRE11</th>\n",
" <th>PRE14</th>\n",
" <th>PRE17</th>\n",
" <th>PRE19</th>\n",
" <th>PRE25</th>\n",
" <th>PRE30</th>\n",
" <th>PRE32</th>\n",
" <th>AGE</th>\n",
" <th>Risk1Yr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>DGN2</td>\n",
" <td>2.88</td>\n",
" <td>2.16</td>\n",
" <td>PRZ1</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>T</td>\n",
" <td>OC14</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>60</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>DGN3</td>\n",
" <td>3.40</td>\n",
" <td>1.88</td>\n",
" <td>PRZ0</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>OC12</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>51</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>DGN3</td>\n",
" <td>2.76</td>\n",
" <td>2.08</td>\n",
" <td>PRZ1</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>OC11</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>59</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>DGN3</td>\n",
" <td>3.68</td>\n",
" <td>3.04</td>\n",
" <td>PRZ0</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>OC11</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>54</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>DGN3</td>\n",
" <td>2.44</td>\n",
" <td>0.96</td>\n",
" <td>PRZ2</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>T</td>\n",
" <td>OC11</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>F</td>\n",
" <td>T</td>\n",
" <td>F</td>\n",
" <td>73</td>\n",
" <td>T</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id DGN PRE4 PRE5 PRE6 PRE7 PRE8 PRE9 PRE10 PRE11 PRE14 PRE17 PRE19 \\\n",
"0 1 DGN2 2.88 2.16 PRZ1 F F F T T OC14 F F \n",
"1 2 DGN3 3.40 1.88 PRZ0 F F F F F OC12 F F \n",
"2 3 DGN3 2.76 2.08 PRZ1 F F F T F OC11 F F \n",
"3 4 DGN3 3.68 3.04 PRZ0 F F F F F OC11 F F \n",
"4 5 DGN3 2.44 0.96 PRZ2 F T F T T OC11 F F \n",
"\n",
" PRE25 PRE30 PRE32 AGE Risk1Yr \n",
"0 F T F 60 F \n",
"1 F T F 51 F \n",
"2 F T F 59 F \n",
"3 F F F 54 F \n",
"4 F T F 73 T "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"filename = '/Users/kaanguney.keklikci/Desktop/Erasmus+/Heidelberg/data/ThoraricSurgery.csv'\n",
"df = pd.read_csv(filename)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "c6014eae",
"metadata": {},
"outputs": [],
"source": [
"df.drop('id',axis=1,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "b82b994d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DGN object\n",
"PRE4 float64\n",
"PRE5 float64\n",
"PRE6 object\n",
"PRE7 object\n",
"PRE8 object\n",
"PRE9 object\n",
"PRE10 object\n",
"PRE11 object\n",
"PRE14 object\n",
"PRE17 object\n",
"PRE19 object\n",
"PRE25 object\n",
"PRE30 object\n",
"PRE32 object\n",
"AGE int64\n",
"Risk1Yr object\n",
"dtype: object"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "a97006e6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAT9UlEQVR4nO3df7DldX3f8eeLBQQFA4Trdt1dB7Q0DabNwtwiUcchUM3KpFmSMRY7GrRk1nYgo9ZJFKcd0IaptirRxNLZBHSxKqEoQiklEiRxnFRgwRVY0LqVtezOwl5AUaJhuuu7f5zPfjmze+9yL5zvOXu5z8fMmfv9fr4/7ovL2fO63+/5nu9NVSFJEsAhkw4gSTp4WAqSpI6lIEnqWAqSpI6lIEnqHDrpAM/F2rVr6+abb550DElabDLXgkV9pPDoo49OOoIkPa8s6lKQJI2WpSBJ6lgKkqSOpSBJ6lgKkqROb6WQ5IgkdyT5VpItST7Yxj+T5MEkm9tjTRtPkk8m2ZrkniSn9pVNkjS7Pj+n8BRwZlU9meQw4OtJ/mdb9vtVde0+678ROKk9XgVc3r5KksaktyOFGniyzR7WHge6T/c64Kq23TeAY5Ks6CufJGl/vb6nkGRZks3ALuCWqrq9Lbq0nSK6LMkL2thK4KGhzbe3sX33uT7JpiSbZmZm+owvSUtOr7e5qKo9wJokxwDXJfkl4CLgYeBwYAPwPuBDC9jnhrYd09PTz7u/EPTTn9466QizOvLIsyYdQdIYjOXqo6r6IXAbsLaqdrZTRE8BnwZOa6vtAFYPbbaqjUmSxqTPq4+m2hECSY4EXg98e+/7BEkCnAPc1za5AfiddhXS6cATVbWzr3ySpP31efpoBbAxyTIG5XNNVd2Y5KtJphjcpW8z8K/a+jcBZwNbgZ8A7+gxmyRpFr2VQlXdA5wyy/iZc6xfwAV95ZEkPTM/0SxJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6lgKkqSOpSBJ6vRWCkmOSHJHkm8l2ZLkg238xCS3J9ma5M+THN7GX9Dmt7blJ/SVTZI0uz6PFJ4CzqyqXwbWAGuTnA58BLisqv4+8APg/Lb++cAP2vhlbT1J0hj1Vgo18GSbPaw9CjgTuLaNbwTOadPr2jxt+VlJ0lc+SdL+en1PIcmyJJuBXcAtwP8BflhVu9sq24GVbXol8BBAW/4E8POz7HN9kk1JNs3MzPQZX5KWnF5Loar2VNUaYBVwGvAPR7DPDVU1XVXTU1NTz3V3kqQhY7n6qKp+CNwG/ApwTJJD26JVwI42vQNYDdCW/xzw2DjySZIG+rz6aCrJMW36SOD1wAMMyuFNbbXzgOvb9A1tnrb8q1VVfeWTJO3v0Gde5VlbAWxMsoxB+VxTVTcmuR+4OskfAt8ErmjrXwF8NslW4HHg3B6zSZJm0VspVNU9wCmzjH+PwfsL+47/HfDbfeWRJD0zP9EsSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkTm+lkGR1ktuS3J9kS5J3tfFLkuxIsrk9zh7a5qIkW5N8J8mv9ZVNkjS7Q3vc927gvVV1d5KjgbuS3NKWXVZVHx1eOcnJwLnAK4GXAn+Z5B9U1Z4eM0qShvR2pFBVO6vq7jb9Y+ABYOUBNlkHXF1VT1XVg8BW4LS+8kmS9jeW9xSSnACcAtzehi5Mck+SK5Mc28ZWAg8NbbadWUokyfokm5JsmpmZ6TO2JC05vZdCkqOALwLvrqofAZcDrwDWADuBjy1kf1W1oaqmq2p6ampq1HElaUnrtRSSHMagED5XVV8CqKpHqmpPVf0M+FOePkW0A1g9tPmqNiZJGpM+rz4KcAXwQFV9fGh8xdBqvwnc16ZvAM5N8oIkJwInAXf0lU+StL8+rz56DfA24N4km9vYB4C3JFkDFLANeCdAVW1Jcg1wP4Mrly7wyiNJGq/eSqGqvg5klkU3HWCbS4FL+8okSTowP9EsSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSepYCpKkjqUgSer0VgpJVie5Lcn9SbYkeVcbPy7JLUm+274e28aT5JNJtia5J8mpfWWTJM2uzyOF3cB7q+pk4HTggiQnA+8Hbq2qk4Bb2zzAG4GT2mM9cHmP2SRJs5hXKSS5dT5jw6pqZ1Xd3aZ/DDwArATWARvbahuBc9r0OuCqGvgGcEySFfPJJ0kajUMPtDDJEcALgePbaZ60RS9m8AI/L0lOAE4BbgeWV9XOtuhhYHmbXgk8NLTZ9ja2c2iMJOsZHEnwspe9bL4RJEnzcMBSAN4JvBt4KXAXT5fCj4A/mc83SHIU8EXg3VX1oyTdsqqqJLWQwFW1AdgAMD09vaBtJUkHdsBSqKpPAJ9I8ntV9ccL3XmSwxgUwueq6ktt+JEkK6pqZzs9tKuN7wBWD22+qo1JksbkmY4UAKiqP07yauCE4W2q6qq5tsngkOAK4IGq+vjQohuA84APt6/XD41fmORq4FXAE0OnmSRJYzCvUkjyWeAVwGZgTxsuYM5SAF4DvA24N8nmNvYBBmVwTZLzge8Db27LbgLOBrYCPwHeMd//CEnSaMyrFIBp4OSqmvc5/Kr6Ok+/B7Gvs2ZZv4AL5rt/SdLozfdzCvcBf6/PIJKkyZvvkcLxwP1J7gCe2jtYVb/RSypJ0kTMtxQu6TOEJOngMN+rj/667yCSpMmb79VHP2ZwtRHA4cBhwN9W1Yv7CiZJGr/5HikcvXe6ff5gHYOb3EmSnkcWfJfUdsO6LwO/Nvo4kqRJmu/po98amj2EwecW/q6XRJKkiZnv1Uf/bGh6N7CNwSkkSdLzyHzfU/CWE5K0BMz3j+ysSnJdkl3t8cUkq/oOJ0kar/m+0fxpBncxfWl7/Pc2Jkl6HplvKUxV1aerand7fAaY6jGXJGkC5lsKjyV5a5Jl7fFW4LE+g0mSxm++pfAvGfzdg4cZ/M3kNwFv7ymTJGlC5ntJ6oeA86rqBwBJjgM+yqAsJEnPE/M9UvjHewsBoKoeB07pJ5IkaVLmWwqHJDl270w7UpjvUYYkaZGY7wv7x4D/leS/tfnfBi7tJ5IkaVLm+4nmq5JsAs5sQ79VVff3F0uSNAnzPgXUSsAikKTnsQXfOnu+klzZbolx39DYJUl2JNncHmcPLbsoydYk30nibbklaQJ6KwXgM8DaWcYvq6o17XETQJKTgXOBV7Zt/nOSZT1mkyTNordSqKqvAY/Pc/V1wNVV9VRVPQhsBU7rK5skaXZ9HinM5cIk97TTS3svc10JPDS0zvY2tp8k65NsSrJpZmam76yStKSMuxQuB14BrGFwu4yPLXQHVbWhqqaranpqynvySdIojbUUquqRqtpTVT8D/pSnTxHtAFYPrbqqjUmSxmispZBkxdDsbwJ7r0y6ATg3yQuSnAicBNwxzmySpB5vVZHkC8AZwPFJtgMXA2ckWQMUg7/z/E6AqtqS5BoGn4PYDVxQVXv6yiZJml1vpVBVb5ll+IoDrH8p3jpDkiZqElcfSZIOUpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKnTWykkuTLJriT3DY0dl+SWJN9tX49t40nyySRbk9yT5NS+ckmS5tbnkcJngLX7jL0fuLWqTgJubfMAbwROao/1wOU95pIkzaG3UqiqrwGP7zO8DtjYpjcC5wyNX1UD3wCOSbKir2ySpNmN+z2F5VW1s00/DCxv0yuBh4bW297G9pNkfZJNSTbNzMz0l1SSlqCJvdFcVQXUs9huQ1VNV9X01NRUD8kkaekadyk8sve0UPu6q43vAFYPrbeqjUmSxmjcpXADcF6bPg+4fmj8d9pVSKcDTwydZpIkjcmhfe04yReAM4Djk2wHLgY+DFyT5Hzg+8Cb2+o3AWcDW4GfAO/oK5ckaW69lUJVvWWORWfNsm4BF/SVRZI0P36iWZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUOXQS3zTJNuDHwB5gd1VNJzkO+HPgBGAb8Oaq+sEk8knSUjXJI4Vfrao1VTXd5t8P3FpVJwG3tnlJ0hhN5EhhDuuAM9r0RuCvgPctdCeX3/m10SUaoX/9T1436QiS9IwmdaRQwFeS3JVkfRtbXlU72/TDwPLZNkyyPsmmJJtmZmbGkVWSloxJHSm8tqp2JHkJcEuSbw8vrKpKUrNtWFUbgA0A09PTs64jSXp2JlIKVbWjfd2V5DrgNOCRJCuqameSFcCuSWTTc/O12x6YdIRZve5Xf3HSEaRFYeynj5K8KMnRe6eBNwD3ATcA57XVzgOuH3c2SVrqJnGksBy4Lsne7//5qro5yZ3ANUnOB74PvHkC2SRpSRt7KVTV94BfnmX8MeCsceeRJD3NTzRLkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjqT+hvNkkbsP335jklHmNPvn3PapCNonjxSkCR1LAVJUsdSkCR1LAVJUsdSkCR1Drqrj5KsBT4BLAP+rKo+POFIksZg5sY/mHSEWU39+n+cdISxOqiOFJIsAz4FvBE4GXhLkpMnm0qSlo6D7UjhNGBrVX0PIMnVwDrg/omm0pIwc9kfTjrCnKbe828nHUHP4MubL5p0hFmds+Y/LGj9VFVPURYuyZuAtVX1u23+bcCrqurCoXXWA+vb7C8A3+kx0vHAoz3uv2/mn6zFnH8xZwfzP5NHq2rtbAsOtiOFZ1RVG4AN4/heSTZV1fQ4vlcfzD9Zizn/Ys4O5n8uDqr3FIAdwOqh+VVtTJI0BgdbKdwJnJTkxCSHA+cCN0w4kyQtGQfV6aOq2p3kQuAvGFySemVVbZlgpLGcpuqR+SdrMedfzNnB/M/aQfVGsyRpsg6200eSpAmyFCRJnSVRCkn2JNmcZEuSbyV5b5JDhpafluSvknw3yd1J/keSf9SWXZLkJ0leMrT+k+3r6iS3Jbm/7ftdiyz/EUnuaPvckuSDiyn/0PyyJN9McuNiy5/kPW2/9yX5QpIjFln+bUnubfvfNOrsY8h/TJJrk3w7yQNJfmURZb8yya4k940yM1X1vH8ATw5NvwT4S+CDbX45sA149dA6rwXOadOXAP8X+Mi++wNWAKe26aOB/w2cvIjyBziqTR8G3A6cvljyD83/G+DzwI2L7PmzEngQOLLNXwO8fbHkb9PbgOP7+LmPKf9G4Hfb9OHAMYso++uAU4H7Rpq5z/+ZB8tjlheRlwOPMXhR/Pd7/yfNse0l7bENOG62/Q2tez3w+sWYH3ghcDeDT5AvmvwMPstyK3AmYyiFUeZnUAoPAccxuBLwRuANiyV/m97GGEthxD//n2NQylls2YfWOYERl8KSOH20rxrcW2kZg+Z+JYMXwwN5ErgSmPP0UJITgFMY/Lbdq1Hmb6deNgO7gFuqalHlB/4I+APgZyOMeECjyl9VO4CPMvhtcCfwRFV9ZeSB9zHin38BX0lyVwa3oOndCPOfCMwAn26nH/8syYtGnXdYH689o7YkS+FAktzezi1+Yp9FnwTOS3L0LNscBXwReHdV/WgcOeey0PxVtaeq1jD4jfu0JL80pqizWkj+JL8O7Kqqu8Ya8gAWmP9YBjd8PBF4KfCiJG8dX9r9PYvn/2ur6lQGdza+IMnrxhJ0DgvMfyiD0y+XV9UpwN8C7x9T1P08m9eePizJUkjycmAPg9+OtzB4YgBQVa8C/h2DQ0uGxn/I4Lz1Bfvs6zAGhfC5qvpSr8Gf/p4jy7/P8tuAWW+SNUojzP8a4DeSbAOuBs5M8l/7zA4jzf9PgQeraqaq/h/wJeDVvYZntM+fdrRDVe0CrmNwp+NejTD/dmD70NHxtcP76kMf/3ZHbcmVQpIp4L8Af1KDk3KfAt6eZPgf4wvn2PzjwDtpnwRPEuAK4IGq+nh/qZ824vxTSY5p00cCrwe+3VN09n5PRpS/qi6qqlVVdQKDW6J8tap6/U17lPkZnDY6PckL23PpLOCBfpIPjPj586K9v7220y5vAEZ7Jcw+Rvz8eRh4KMkvtOVn0eNt+kf83OnNQXWbix4d2c6bHwbsBj7L4IdMVT2c5J8DH0mykkGDPwp8aN+dVNWjSa4D3tOGXgO8Dbi37R/gA1V10yLJvwLYmMEfNzoEuKaq+riss6/849JL/qq6Pcm1DM4r7wa+ST+3N+jr578cuG7QZxwKfL6qbl5E+QF+D/hcBvda+x7wjsWSPckXgDOA45NsBy6uqiuea2BvcyFJ6iy500eSpLlZCpKkjqUgSepYCpKkjqUgSepYCtICpOc7vkqTtlQ+pyCNyk/bbUFoL+6fB14MXJxkOYM7nf6Lqvqbts5rgVcA97btHwXeC7xvzLmlefFzCtICJHmyqo4amn85cCdwPIMPHf2sqi6eY9tL2uTbGdxy/fF99ydNmqePpOdgMdz1UloIS0HqycFy10tpISwF6TlYDHe9lBbCN5qlZ2nfu14m+RRwe5K/2PtGMwe+6+Wd+G9QBxmfkNLCLPY7vkoH5NVHkqSO7ylIkjqWgiSpYylIkjqWgiSpYylIkjqWgiSpYylIkjr/H6Hzdzf0kiVSAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.countplot(df.DGN,palette='Set3')\n",
"sns.despine()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "b25a11b2",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAASGklEQVR4nO3df6wdZ33n8fcnjoG0QEM2l9TYRqGsV7uhu3Wi20BLVdFEbE22W9OqpWEFDWwks9ukgipq+SFVBNRIrVRIoaVZuZuAQ2lTbyEly6bpZkO6KNoliZOaEMcgXAiKLSe+IfxICk1l8+0f5/GTI+faOU7unHPj+35JozPzzDNzv0dz7c+d58zMSVUhSRLASbMuQJK0fBgKkqTOUJAkdYaCJKkzFCRJ3cmzLuCZ2LRpU910002zLkOSnm1ytBXP6jOFhx9+eNYlSNIJ5VkdCpKkpWUoSJI6Q0GS1BkKkqTOUJAkdYOFQpLnJbkjyReS7Eryvtb+sSRfS7KzTRtbe5J8OMmeJPckOWeo2iRJixvyPoXHgfOq6rEkq4Hbkvx1W/ebVfWXR/R/HbChTa8ErmqvkqQpGexMoUYea4ur23Ss53RvBq5t230eODXJmqHqkyQ92aCfKSRZlWQncAC4uapub6uuaENEVyZ5bmtbCzwwtvne1nbkPrck2ZFkx8LCwpDlS9KKM+hjLqrqELAxyanA9Ul+FHg38CDwHGAr8E7g/cexz61tO+bn5/2GoBXie9+7ZdYlnPBOOeX8WZegZWAqVx9V1beAW4FNVbW/DRE9DnwUOLd12wesH9tsXWuTJE3JkFcfzbUzBJKcArwW+NLhzwmSBHg9cG/b5AbgV9tVSK8Cvl1V+4eqT5L0ZEMOH60BtiVZxSh8tlfVZ5J8Nskco6f07QT+S+t/I3ABsAf4LvDWAWuTJC1isFCoqnuAsxdpP+8o/Qu4ZKh6JElPzTuaJUmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUDRYKSZ6X5I4kX0iyK8n7WvvLktyeZE+Sv0jynNb+3La8p60/c6jaJEmLG/JM4XHgvKr6MWAjsCnJq4DfA66sqn8JfBO4uPW/GPhma7+y9ZMkTdFgoVAjj7XF1W0q4DzgL1v7NuD1bX5zW6atPz9JhqpPkvRkg36mkGRVkp3AAeBm4O+Bb1XVwdZlL7C2za8FHgBo678N/ItF9rklyY4kOxYWFoYsX5JWnEFDoaoOVdVGYB1wLvCvl2CfW6tqvqrm5+bmnunuJEljpnL1UVV9C7gV+Ang1CQnt1XrgH1tfh+wHqCt/yHgG9OoT5I0MuTVR3NJTm3zpwCvBXYzCodfat0uAj7d5m9oy7T1n62qGqo+SdKTnfzUXZ62NcC2JKsYhc/2qvpMkvuA65L8DvB3wNWt/9XAx5PsAR4BLhywNknSIgYLhaq6Bzh7kfavMvp84cj2fwR+eah6JElPzTuaJUmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkrrBQiHJ+iS3Jrkvya4kb2/tlyfZl2Rnmy4Y2+bdSfYk+XKSnx2qNknS4k4ecN8Hgcuq6u4kLwDuSnJzW3dlVf3+eOckZwEXAq8AXgL8nyT/qqoODVijJGnMYGcKVbW/qu5u848Cu4G1x9hkM3BdVT1eVV8D9gDnDlWfJOnJpvKZQpIzgbOB21vTpUnuSXJNkhe1trXAA2Ob7WWREEmyJcmOJDsWFhaGLFuSVpzBQyHJ84FPAu+oqu8AVwEvBzYC+4EPHM/+qmprVc1X1fzc3NxSlytJK9qgoZBkNaNA+ERVfQqgqh6qqkNV9X3gT3hiiGgfsH5s83WtTZI0JUNefRTgamB3VX1wrH3NWLdfAO5t8zcAFyZ5bpKXARuAO4aqT5L0ZENeffRq4M3AF5PsbG3vAd6YZCNQwP3A2wCqaleS7cB9jK5cusQrjyRpugYLhaq6Dcgiq248xjZXAFcMVZMk6di8o1mS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd1goZBkfZJbk9yXZFeSt7f205LcnOQr7fVFrT1JPpxkT5J7kpwzVG2SpMUNeaZwELisqs4CXgVckuQs4F3ALVW1AbilLQO8DtjQpi3AVQPWJklaxEShkOSWSdrGVdX+qrq7zT8K7AbWApuBba3bNuD1bX4zcG2NfB44NcmaSeqTJC2Nk4+1MsnzgB8ATm/DPGmrXsjoP/iJJDkTOBu4HTijqva3VQ8CZ7T5tcADY5vtbW37x9pIsoXRmQQvfelLJy1BkjSBY4YC8DbgHcBLgLt4IhS+A/zRJD8gyfOBTwLvqKrvJOnrqqqS1PEUXFVbga0A8/Pzx7WtJOnYjhkKVfUh4ENJfr2q/vB4d55kNaNA+ERVfao1P5RkTVXtb8NDB1r7PmD92ObrWpskaUqe6kwBgKr6wyQ/CZw5vk1VXXu0bTI6Jbga2F1VHxxbdQNwEfC77fXTY+2XJrkOeCXw7bFhJknSFEwUCkk+Drwc2Akcas0FHDUUgFcDbwa+mGRna3sPozDYnuRi4OvAG9q6G4ELgD3Ad4G3TvomJElLY6JQAOaBs6pq4jH8qrqNJz6DONL5i/Qv4JJJ9y9JWnqT3qdwL/DDQxYiSZq9Sc8UTgfuS3IH8Pjhxqr6+UGqkiTNxKShcPmQRUiSlodJrz76v0MXIkmavUmvPnqU0dVGAM8BVgP/UFUvHKowSdL0TXqm8ILD8+3+g82MHnInSTqBHPdTUtsD6/4K+NmlL0eSNEuTDh/94tjiSYzuW/jHQSqSJM3MpFcf/cex+YPA/YyGkCRJJ5BJP1PwkROStAJM+iU765Jcn+RAmz6ZZN3QxUmSpmvSD5o/yugppi9p0/9sbZKkE8ikoTBXVR+tqoNt+hgwN2BdkqQZmDQUvpHkTUlWtelNwDeGLEySNH2ThsJ/ZvS9Bw8y+s7kXwLeMlBNkqQZmfSS1PcDF1XVNwGSnAb8PqOwkCSdICY9U/h3hwMBoKoeAc4epiRJ0qxMGgonJXnR4YV2pjDpWYYk6Vli0v/YPwD8/yT/oy3/MnDFMCVJkmZl0juar02yAzivNf1iVd03XFmSpFmYeAiohYBBIEknsON+dPakklzTHolx71jb5Un2JdnZpgvG1r07yZ4kX07iY7klaQYGCwXgY8CmRdqvrKqNbboRIMlZwIXAK9o2f5xk1YC1SZIWMVgoVNXngEcm7L4ZuK6qHq+qrwF7gHOHqk2StLghzxSO5tIk97ThpcOXua4FHhjrs7e1PUmSLUl2JNmxsLAwdK2StKJMOxSuAl4ObGT0uIwPHO8OqmprVc1X1fzcnM/kk6SlNNVQqKqHqupQVX0f+BOeGCLaB6wf67qutUmSpmiqoZBkzdjiLwCHr0y6AbgwyXOTvAzYANwxzdokSQM+qiLJnwOvAU5Pshd4L/CaJBuBYvQ9z28DqKpdSbYzug/iIHBJVR0aqjZJ0uIGC4WqeuMizVcfo/8V+OgMSZqpWVx9JElapgwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUDRYKSa5JciDJvWNtpyW5OclX2uuLWnuSfDjJniT3JDlnqLokSUc35JnCx4BNR7S9C7ilqjYAt7RlgNcBG9q0BbhqwLokSUcxWChU1eeAR45o3gxsa/PbgNePtV9bI58HTk2yZqjaJEmLm/ZnCmdU1f42/yBwRptfCzww1m9va3uSJFuS7EiyY2FhYbhKJWkFmtkHzVVVQD2N7bZW1XxVzc/NzQ1QmSStXNMOhYcODwu11wOtfR+wfqzfutYmSZqiaYfCDcBFbf4i4NNj7b/arkJ6FfDtsWEmSdKUnDzUjpP8OfAa4PQke4H3Ar8LbE9yMfB14A2t+43ABcAe4LvAW4eqS5J0dIOFQlW98Sirzl+kbwGXDFWLJGky3tEsSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTu5Fn80CT3A48Ch4CDVTWf5DTgL4AzgfuBN1TVN2dRnyStVLM8U/iZqtpYVfNt+V3ALVW1AbilLUuSpmgmZwpHsRl4TZvfBvwt8M6l2vlVd35uqXalY/ivP/7Tsy5B0jMwqzOFAv53kruSbGltZ1TV/jb/IHDGYhsm2ZJkR5IdCwsL06hVklaMWZ0p/FRV7UvyYuDmJF8aX1lVlaQW27CqtgJbAebn5xftI0l6emYSClW1r70eSHI9cC7wUJI1VbU/yRrgwCxqk7T0Pnfr7lmXcML76Z/5N0uyn6kPHyX5wSQvODwP/HvgXuAG4KLW7SLg09OuTZJWulmcKZwBXJ/k8M//s6q6KcmdwPYkFwNfB94wg9okaUWbeihU1VeBH1uk/RvA+dOuR5L0BO9oliR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSt+xCIcmmJF9OsifJu2ZdjyStJMsqFJKsAj4CvA44C3hjkrNmW5UkrRzLKhSAc4E9VfXVqvon4Dpg84xrkqQV4+RZF3CEtcADY8t7gVeOd0iyBdjSFh9L8uUp1TYLpwMPz7qI4/Frsy5geXnWHT91J/qxu6mqNi22YrmFwlOqqq3A1lnXMQ1JdlTV/Kzr0NPj8Xv2WsnHbrkNH+0D1o8tr2ttkqQpWG6hcCewIcnLkjwHuBC4YcY1SdKKsayGj6rqYJJLgb8BVgHXVNWuGZc1SytimOwE5vF79lqxxy5VNesaJEnLxHIbPpIkzZChIEnqDIUpSHIoyc4ku5J8IcllSU4aW39ukr9N8pUkdyf5X0n+bVt3eZLvJnnxWP/H2uv6JLcmua/t++3Tf3cnvgGP3/OS3NH2uSvJ+6b/7k58Qx2/seVVSf4uyWem966Gs6w+aD6Bfa+qNgK0X64/A14IvDfJGcB24D9V1f9rfX4KeDnwxbb9w8BlwDuP2O9B4LKqujvJC4C7ktxcVfcN/YZWmKGO3+PAeVX1WJLVwG1J/rqqPj/0G1phhjp+h70d2N32+aznmcKUVdUBRndkX5okwKXAtsO/kK3PbVX1V2ObXQP8SpLTjtjX/qq6u80/yugXc+3Ab2FFW+LjV1V1+K/O1W3yyo8BLeXxA0iyDvgPwH8ftPApMhRmoKq+yuiS2xcDrwDufopNHmP0i3nU4aEkZwJnA7cvTZU6mqU8fm3oYSdwALi5qjx+A1vif39/APwW8P0lLHGmDIVlJsntSXYn+dARqz4MXNSGiY7c5vnAJ4F3VNV3plGnFne8x6+qDrWhjXXAuUl+dEqlahHHc/yS/BxwoKrummqRAzMUZiDJjwCHGP11uAs45/C6qnol8NvAD41vU1XfYjQWeskR+1rNKBA+UVWfGrRwAUt7/I5Yfyuw6EPKtHSW8Pi9Gvj5JPczeqLzeUn+dMjap8FQmLIkc8B/A/6oRncOfgR4S5KfHOv2A0fZ/IPA22gXCLQx0auB3VX1weGq1mFLfPzmkpza5k8BXgt8aaDSxdIev6p6d1Wtq6ozGT2S57NV9abBip8Srz6ajlPauPFqRlcMfZzRLxhV9WCSXwF+L8laRn+9PAy8/8idVNXDSa4HfqM1vRp4M/DFtn+A91TVjQO+l5VoqOO3BtiW0ZdLnQRsr6oT4rLGZWao43dC8jEXkqTO4SNJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCdByGfuKmNGvepyAdn6GfuCnNlPcpSMchyWNV9fyx5R8B7gROZ3TD0/er6r1H2fbyNvsW4JyqeuTI/Umz5vCR9AwM8cRbaZYMBWkgT+eJt9KsGQrSMzDEE1OlWfKDZulpOvKJm0k+Atye5G/GvsnrWE/cvBP/DWqZ8RdSOj4+cVMnNK8+kiR1fqYgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqftn20qlbf0HGsAAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"dgn_types = ['DGN1','DGN8', 'DGN5', 'DGN6']\n",
"df = df[~df.DGN.isin(dgn_types)]\n",
"\n",
"sns.countplot(df.DGN,palette='Set3')\n",
"sns.despine()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "19387a02",
"metadata": {},
"outputs": [],
"source": [
"X, y = df.drop('Risk1Yr',axis=1), df.Risk1Yr"
]
},
{
"cell_type": "markdown",
"id": "01b55142",
"metadata": {},
"source": [
"### Training, validation split"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "dba575cd",
"metadata": {},
"outputs": [],
"source": [
"X_train,X_test,y_train,y_test = train_test_split(X, \n",
" y,\n",
" test_size=.33,\n",
" random_state=42,\n",
" shuffle=True,\n",
" stratify=y)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "4b3a5041",
"metadata": {},
"outputs": [],
"source": [
"keepdims = ['DGN','PRE4','PRE5']\n",
"X_train, X_test = X_train[keepdims], X_test[keepdims]\n",
"\n",
"X_train = X_train.reset_index(drop=True)\n",
"X_test = X_test.reset_index(drop=True)"
]
},
{
"cell_type": "markdown",
"id": "5893ec4e",
"metadata": {},
"source": [
"### Standardize data"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "04212592",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"StandardScaler()"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler = StandardScaler()\n",
"scaler.fit(X_train[keepdims[1:]])\n",
"scaler"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "de7a4ffd",
"metadata": {},
"outputs": [],
"source": [
"X_train = np.append(X_train.DGN.to_numpy().reshape(-1,1), scaler.transform(X_train[keepdims[1:]]), axis=1)\n",
"X_test = np.append(X_test.DGN.to_numpy().reshape(-1,1), scaler.transform(X_test[keepdims[1:]]), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "2bd71c25",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"X_train = pd.DataFrame(X_train, columns=keepdims)\n",
"X_test = pd.DataFrame(X_test, columns=keepdims)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "e4dace09",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>DGN3</td>\n",
" <td>0.469764</td>\n",
" <td>-0.149318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DGN3</td>\n",
" <td>1.609505</td>\n",
" <td>-0.052993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>DGN3</td>\n",
" <td>0.378584</td>\n",
" <td>-0.175005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>DGN2</td>\n",
" <td>-0.35085</td>\n",
" <td>-0.207114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>DGN3</td>\n",
" <td>1.575313</td>\n",
" <td>-0.083496</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DGN PRE4 PRE5\n",
"0 DGN3 0.469764 -0.149318\n",
"1 DGN3 1.609505 -0.052993\n",
"2 DGN3 0.378584 -0.175005\n",
"3 DGN2 -0.35085 -0.207114\n",
"4 DGN3 1.575313 -0.083496"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "7543c9a4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DGN</th>\n",
" <th>PRE4</th>\n",
" <th>PRE5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>DGN3</td>\n",
" <td>1.427147</td>\n",
" <td>-0.097945</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DGN2</td>\n",
" <td>0.150636</td>\n",
" <td>-0.165373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>DGN3</td>\n",
" <td>1.974223</td>\n",
" <td>-0.075469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>DGN3</td>\n",
" <td>-0.852337</td>\n",
" <td>-0.232801</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>DGN4</td>\n",
" <td>2.156581</td>\n",
" <td>0.011225</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DGN PRE4 PRE5\n",
"0 DGN3 1.427147 -0.097945\n",
"1 DGN2 0.150636 -0.165373\n",
"2 DGN3 1.974223 -0.075469\n",
"3 DGN3 -0.852337 -0.232801\n",
"4 DGN4 2.156581 0.011225"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "2ce29c1e",
"metadata": {},
"outputs": [],
"source": [
"# end of preprocessing"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment