Unverified Commit 0da97b56 authored by kaanguney's avatar kaanguney Committed by GitHub

Delete notebooks directory

parent 059106cb
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "94169c2a",
"metadata": {},
"outputs": [],
"source": [
"import os \n",
"os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' \n",
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler\n",
"import tensorflow as tf\n",
"tf.compat.v1.disable_eager_execution() \n",
"import tensorflow_probability as tfp\n",
"import matplotlib.pyplot as plt\n",
"from data_loader import load_data\n",
"from data_preprocesser import preprocess_data\n",
"from planar import Planar"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "087e74b6",
"metadata": {},
"outputs": [],
"source": [
"def train(session, loss, optimizer, steps=int(1e5)):\n",
" \n",
" \"\"\" optimize for all dimensions \"\"\"\n",
" \n",
" recorded_steps = []\n",
" recorded_losses = []\n",
" for i in range(steps):\n",
" _, loss_per_iteration = session.run([optimizer, loss])\n",
" if i % 100 == 0:\n",
" recorded_steps.append(i)\n",
" recorded_losses.append(loss_per_iteration)\n",
" if i % int(1e4) == 0:\n",
" print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))\n",
" return recorded_losses\n",
"\n",
"def plot_results(recorded_losses):\n",
" \n",
" \"\"\" plot loss \"\"\"\n",
" print('Displaying results...')\n",
" fig = plt.figure(figsize=(10,5))\n",
" x = np.arange(len(recorded_losses))\n",
" y = recorded_losses\n",
" m, b = np.polyfit(x, y, 1) \n",
" plt.scatter(x, y, s=10, alpha=0.3)\n",
" plt.plot(x, m*x+b, c=\"r\")\n",
" plt.title('Loss per 100 iteration')\n",
" plt.xlabel('Iteration')\n",
" plt.ylabel('Loss')\n",
" plt.tight_layout()\n",
" plt.show()\n",
" \n",
"def create_tensor(data, batch_size):\n",
" dataset = tf.data.Dataset.from_tensor_slices(data.astype(np.float32))\n",
" dataset = dataset.repeat()\n",
" dataset = dataset.shuffle(buffer_size=data.shape[0])\n",
" dataset = dataset.prefetch(2*batch_size)\n",
" dataset = dataset.batch(batch_size)\n",
" data_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)\n",
" samples = data_iterator.get_next()\n",
" return samples\n",
"\n",
"\"\"\" \n",
"if any error on tensorflow is displayed claiming tf.float32 is not displayed,\n",
"do the following (one of them is probably enough)\n",
" ** downgrade keras to 2.3.1\n",
" ** replace tf.float32 with np.float32\n",
"\"\"\"\n",
"def check_version(): \n",
" print(f'Tensorflow version: {tf.__version__}')\n",
" print(f'Tensorflow-probability version: {tfp.__version__}')\n",
" print(f'Keras version: {tf.keras.__version__}\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23f55548",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data successfully loaded...\n",
"\n",
"Data successfully preprocessed...\n",
"\n",
"Optimizer and loss successfully defined...\n",
"\n",
"Iteration 0: 95.97050476074219\n",
"Iteration 10000: 57.572265625\n",
"Iteration 20000: 49.272705078125\n",
"Iteration 30000: 46.765769958496094\n",
"Iteration 40000: 46.634979248046875\n",
"Iteration 50000: 44.979713439941406\n"
]
}
],
"source": [
"def main():\n",
" \n",
" \"\"\" load data \"\"\"\n",
"\n",
" filename = 'prostate.xls'\n",
" directory = '/Users/kaanguney.keklikci/Data/'\n",
"\n",
" loader = load_data(filename, directory)\n",
" loader.create_directory(directory)\n",
" data = loader.read_data(directory, filename)\n",
" print('Data successfully loaded...\\n')\n",
" \n",
" \"\"\" preprocess data \"\"\"\n",
"\n",
" fillna_vals = ['sz', 'sg', 'wt']\n",
" dropna_vals = ['ekg', 'age']\n",
" drop_vals = ['patno', 'sdate']\n",
"\n",
" preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)\n",
" data = preprocesser.dropna_features(data)\n",
" data = preprocesser.impute(data)\n",
" data = preprocesser.drop_features(data)\n",
" data = preprocesser.encode_categorical(data)\n",
" data = preprocesser.scale(data)\n",
" print('Data successfully preprocessed...\\n')\n",
" \n",
" \"\"\" set Planar parameters \"\"\"\n",
" \n",
" tfd = tfp.distributions\n",
" tfb = tfp.bijectors\n",
"\n",
" batch_size = 32\n",
" dtype = np.float32\n",
" layers = 8\n",
" dims = data.shape[1]\n",
" # multivariate normal for base distribution\n",
" base_dist = tfd.MultivariateNormalDiag(loc=tf.zeros(shape=dims, dtype=dtype))\n",
" learning_rate = 1e-4\n",
" \n",
" \"\"\" initialize samples \"\"\"\n",
" samples = create_tensor(data, batch_size)\n",
" \n",
" \"\"\" make Planar \"\"\"\n",
"\n",
" bijectors = []\n",
" for i in range(0, layers):\n",
" bijectors.append(Planar(input_dimensions=dims, case='density_estimation'))\n",
" bijector = tfb.Chain(bijectors=list(reversed(bijectors)), name='chain_of_planar')\n",
" planar_flow = tfd.TransformedDistribution(\n",
" distribution=base_dist,\n",
" bijector=bijector\n",
" )\n",
"\n",
" loss = -tf.reduce_mean(planar_flow.log_prob(samples))\n",
" optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)\n",
"\n",
" session = tf.compat.v1.Session()\n",
" tf.compat.v1.set_random_seed(42)\n",
" session.run(tf.compat.v1.global_variables_initializer())\n",
" print('Optimizer and loss successfully defined...\\n')\n",
" \n",
" \"\"\" start training \"\"\"\n",
" recorded_losses = train(session, loss, optimizer)\n",
" print('Training finished...\\n')\n",
" \n",
" \"\"\" display results \"\"\"\n",
" plot_results(recorded_losses)\n",
" \n",
" \n",
"if __name__ == \"__main__\":\n",
" main()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d33af87",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment