Unverified Commit eea1f5fe authored by kaanguney's avatar kaanguney Committed by GitHub

Delete scripts directory

parent 0da97b56
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import os
class Experiment:
def __init__(self, optimizer, learning_rate, loss, steps=int(1e5)):
self.optimizer = optimizer
self.steps = int(steps)
def change_optimizer(self, learning_rate, loss, keyword='adam'):
if keyword == 'adam':
self.optimizer = self.optimizer
elif keyword == 'sgd':
self.optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate).minimize(loss)
elif keyword == 'rmsprop':
self.optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate).minimize(loss)
else:
raise NotImplementedError('Undefined optimizer!')
def get_optimizer(self):
return self.optimizer
def set_iteration_count(self, iteration_count):
self.steps = iteration_count
def get_iteration_count(self):
return self.steps
\ No newline at end of file
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn')
tfd = tfp.distributions
tfb = tfp.bijectors
class Cubic(tfb.Bijector):
def __init__(self, a, b, validate_args=False, name='Cubic'):
self.a = tf.cast(a, tf.float32)
self.b = tf.cast(b, tf.float32)
if validate_args:
assert tf.reduce_mean(tf.cast(tf.math.greater_equal(tf.abs(self.a), 1e-5), tf.float32)) == 1.0
assert tf.reduce_mean(tf.cast(tf.math.greater_equal(tf.abs(self.b), 1e-5), tf.float32)) == 1.0
super(Cubic, self).__init__(
validate_args=validate_args, forward_min_event_ndims=0, name=name)
def _forward(self, x):
x = tf.cast(x, tf.float32)
return tf.squeeze(tf.pow(self.a*x + self.b, 3))
def _inverse(self, y):
y = tf.cast(y, tf.float32)
return (tf.math.sign(y) * tf.pow(tf.abs(y), 1/3) - self.b) / self.a
def _forward_log_det_jacobian(self, x):
x = tf.cast(x, tf.float32)
return tf.math.log(3. * tf.abs(self.a)) + 2. * tf.math.log(tf.abs(self.a*x + self.b))
def _inverse_log_det_jacobian(self, x):
return -self._forward_log_det_jacobian(self._inverse(x))
\ No newline at end of file
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
plt.style.use('seaborn-paper')
from data_loader import load_data
from data_preprocesser import preprocess_data
from cubic import Cubic
def main():
tfd = tfp.distributions
tfb = tfp.bijectors
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" define the base distributon as bivariate gaussian """
n = 10000
base_dist = tfd.MultivariateNormalDiag(loc=[0.,0.], scale_diag=[1.,1.])
""" instantiate the bijector """
a = -0.1
b = 5.
bijector = Cubic(a, b, validate_args=True)
x = np.linspace(start=-4, stop=4, num=n).astype(np.float32).reshape(-1, 2)
plt.title('Forward transformation')
plt.plot(x, bijector.forward(x))
plt.show()
plt.plot(x, bijector.inverse(x))
plt.title('Inverse transformation')
plt.show()
plt.plot(x, bijector.forward_log_det_jacobian(x))
plt.title('Jacobian determinant')
plt.show()
plt.plot(x, bijector.inverse_log_det_jacobian(x))
plt.title('Inverse Jacobian determinant')
plt.show()
""" create transformed distribution """
tfd_dist = tfd.TransformedDistribution(distribution=base_dist,
bijector=bijector
)
# prior training
plt.figure(figsize=(12,5))
plt.plot(tfd_dist.prob(x), label='Trainable')
plt.plot(base_dist.prob(x), label='Base')
plt.title('Target and Trainable distribution')
plt.legend()
plt.show()
# sample, batch -- train, validation
x_train = base_dist.sample(10000)
x_train = tf.data.Dataset.from_tensor_slices(x_train)
x_train = x_train.batch(int(n/2))
x_valid = base_dist.sample(1000)
x_valid = tf.data.Dataset.from_tensor_slices(x_valid)
x_valid = x_valid.batch(int(n/2))
print(x_train.element_spec)
print(x_valid.element_spec)
print()
# instantiate trainable bijector
trainable_bijector = tfb.Invert(Cubic(tf.Variable(a,
name='alpha'),
tf.Variable(b,
name='beta')
))
# instantiate trainable distribution
trainable_dist = tfd.TransformedDistribution(tfd_dist,
trainable_bijector
)
# Train the bijector
num_epochs = 10
opt = tf.keras.optimizers.Adam()
train_losses = []
valid_losses = []
norm = 1e3
for epoch in range(num_epochs):
print("Epoch {}...".format(epoch))
train_loss = tf.keras.metrics.Mean()
val_loss = tf.keras.metrics.Mean()
for train_batch in x_train:
with tf.GradientTape() as tape:
tape.watch(trainable_bijector.trainable_variables)
loss = -trainable_dist.log_prob(train_batch)
train_loss(loss)
grads = tape.gradient(loss, trainable_bijector.trainable_variables)
grads, _ = tf.clip_by_global_norm(grads, norm)
# grads = tf.reshape(tf.nn.softmax(grads[-1], axis=1), [2])
# note that both alternatives work almost identically
opt.apply_gradients(zip(grads, trainable_bijector.trainable_variables))
train_losses.append(train_loss.result().numpy())
# validation
for valid_batch in x_valid:
loss = -trainable_dist.log_prob(valid_batch)
val_loss(loss)
valid_losses.append(val_loss.result().numpy())
# Plot the learning curves
plt.plot(train_losses, label='train')
plt.plot(valid_losses, label='valid')
plt.legend()
plt.xlabel("Epochs")
plt.ylabel("Negative log likelihood")
plt.title("Training and validation loss curves")
plt.show()
# Plot the data and learned distributions
plt.figure(figsize=(12,5))
plt.plot(trainable_dist.prob(x), label='Learned')
plt.plot(base_dist.prob(x), label='Data')
plt.legend()
plt.show()
### best result obtained with tuning displayed as above
### radial flows converge fast, more epochs overfit
### DO NOT CHANGE validate_args=True
if __name__ == '__main__':
main()
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from data_loader import load_data
from data_preprocesser import preprocess_data
from maf import IAF
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set MAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [5,5]
base_dist = tfp.distributions.Normal(loc=0., scale=1.)
dims = data.shape[1]
learning_rate = 1e-4
activation = 'relu'
hidden_degrees = 'random'
conditional=True
conditional_event_shape = (dims,)
event_shape = conditional_event_shape
conditional_input_layers = 'first_layer'
""" initialize samples """
iaf = IAF(dtype, tf_version, batch_size,
params, hidden_units, base_dist, dims,
activation,
conditional, hidden_degrees,
conditional_event_shape,
conditional_input_layers,
event_shape
)
dims = iaf.get_dims(data)
samples = iaf.create_tensor(data)
print(f'TensorFlow version: {iaf.tf_version}')
print(f'Number of dimensions: {iaf.dims}')
print(f'Learning rate: {learning_rate}\n')
""" initialize iaf """
iaf = iaf.make_maf(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(iaf.log_prob(samples, bijector_kwargs={'conditional_input': samples}))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
session.run(tf.compat.v1.global_variables_initializer())
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
""" use smaller learning rate for gradient descent or increase batch size """
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
import matplotlib.pyplot as plt
from data_loader import load_data
from data_preprocesser import preprocess_data
from maf import IAF
from experiment import Experiment
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
start_time = time.time()
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
print('\nTraining completed...')
print(f'Training time: {time.time() - start_time} seconds')
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set IAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [5,5] # set this to a small number if you are using CPU
base_dist = tfp.distributions.Normal(loc=0., scale=1., name="gaussian")
dims = data.shape[1]
learning_rate = 1e-4
steps = 1e4
activation = 'relu'
hidden_degrees = 'random'
conditional=True
conditional_event_shape = (dims,)
event_shape = conditional_event_shape
conditional_input_layers = 'first_layer'
""" initialize samples """
iaf = IAF(dtype, tf_version, batch_size,
params, hidden_units, base_dist, dims,
activation,
conditional, hidden_degrees,
conditional_event_shape,
conditional_input_layers,
event_shape
)
dims = iaf.get_dims(data)
samples = iaf.create_tensor(data)
print(f'TensorFlow version: {iaf.tf_version}')
print(f'Number of dimensions: {iaf.dims}')
print(f'Learning rate: {learning_rate}\n')
""" initialize IAF """
iaf = iaf.make_maf(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(iaf.log_prob(samples, bijector_kwargs={'conditional_input': samples}))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
experiment = Experiment(optimizer, learning_rate, loss, steps)
keywords = ['adam', 'rmsprop', 'sgd']
for keyword in keywords:
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
experiment.change_optimizer(learning_rate, loss, keyword=keyword)
optimizer = experiment.get_optimizer()
session.run(tf.compat.v1.global_variables_initializer())
print(f'Optimizer: {optimizer.name}')
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
tf.compat.v1.disable_eager_execution()
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn')
tfd = tfp.distributions
tfb = tfp.bijectors
class MAF(object):
def __init__(self, dtype, tf_version,
batch_size, params, hidden_units,
base_dist, dims,
activation,
conditional, hidden_degrees,
conditional_event_shape,
conditional_input_layers,
event_shape):
self.tf_version = tf_version
self.dtype = dtype
self.base_dist = base_dist
self.dims = dims
self.params = params
self.hidden_units = hidden_units
self.batch_size = batch_size
self.activation = activation
self.conditional = conditional
self.conditional_event_shape = conditional_event_shape
self.hidden_degrees = hidden_degrees
self.conditional_input_layers = conditional_input_layers
self.event_shape = event_shape
def get_tf_version(self):
return self.tf_version
def get_session(self):
return tf.compat.v1.Session()
def get_dims(self, data):
return data.shape[1]
def create_tensor(self, data):
dataset = tf.data.Dataset.from_tensor_slices(data.astype(self.dtype))
dataset = dataset.repeat()
dataset = dataset.shuffle(buffer_size=data.shape[0])
dataset = dataset.prefetch(2*self.batch_size)
dataset = dataset.batch(self.batch_size)
data_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
samples = data_iterator.get_next()
return samples
def get_shift_scale_func(self):
func = tfb.AutoregressiveNetwork(params=self.params,
hidden_units=self.hidden_units,
activation=self.activation,
conditional=self.conditional,
conditional_event_shape=self.conditional_event_shape,
event_shape=self.event_shape,
conditional_input_layers=self.conditional_input_layers,
hidden_degrees=self.hidden_degrees
)
return func
def make_maf(self, data):
distribution = self.base_dist
sample_shape = self.get_dims(data)
shift_scale_function = self.get_shift_scale_func()
bijector = tfb.MaskedAutoregressiveFlow(shift_scale_function)
maf = tfd.TransformedDistribution(tfd.Sample(distribution, sample_shape), bijector)
return maf
class IAF(MAF):
def make_maf(self, data):
distribution = self.base_dist
sample_shape = self.get_dims(data)
shift_scale_function = self.get_shift_scale_func()
bijector = tfb.Invert(tfb.MaskedAutoregressiveFlow(shift_scale_function))
maf = tfd.TransformedDistribution(tfd.Sample(distribution, sample_shape), bijector)
return maf
\ No newline at end of file
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from data_loader import load_data
from data_preprocesser import preprocess_data
from maf import MAF
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set MAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [512,512]
base_dist = tfp.distributions.Normal(loc=0., scale=1.)
dims = data.shape[1]
learning_rate = 1e-4
activation = 'relu'
hidden_degrees = 'random'
conditional=True
conditional_event_shape = (dims,)
event_shape = conditional_event_shape
conditional_input_layers = 'first_layer'
""" initialize samples """
maf = MAF(dtype, tf_version, batch_size,
params, hidden_units, base_dist, dims,
activation,
conditional, hidden_degrees,
conditional_event_shape,
conditional_input_layers,
event_shape
)
dims = maf.get_dims(data)
samples = maf.create_tensor(data)
print(f'TensorFlow version: {maf.tf_version}')
print(f'Number of dimensions: {maf.dims}')
print(f'Learning rate: {learning_rate}\n')
""" initialize MAF """
maf = maf.make_maf(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(maf.log_prob(samples, bijector_kwargs={'conditional_input': samples}))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
session.run(tf.compat.v1.global_variables_initializer())
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
""" use smaller learning rate for gradient descent or increase batch size """
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
import matplotlib.pyplot as plt
from data_loader import load_data
from data_preprocesser import preprocess_data
from maf import MAF
from experiment import Experiment
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
start_time = time.time()
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
print('\nTraining completed...')
print(f'Training time: {time.time() - start_time} seconds')
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set MAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [512,512]
base_dist = tfp.distributions.Normal(loc=0., scale=1., name="gaussian")
dims = data.shape[1]
learning_rate = 1e-4
steps = 1e4
activation = 'relu'
hidden_degrees = 'random'
conditional=True
conditional_event_shape = (dims,)
event_shape = conditional_event_shape
conditional_input_layers = 'first_layer'
""" initialize samples """
maf = MAF(dtype, tf_version, batch_size,
params, hidden_units, base_dist, dims,
activation,
conditional, hidden_degrees,
conditional_event_shape,
conditional_input_layers,
event_shape
)
dims = maf.get_dims(data)
samples = maf.create_tensor(data)
print(f'TensorFlow version: {maf.tf_version}')
print(f'Number of dimensions: {maf.dims}')
print(f'Learning rate: {learning_rate}\n')
""" initialize MAF """
maf = maf.make_maf(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(maf.log_prob(samples, bijector_kwargs={'conditional_input': samples}))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
experiment = Experiment(optimizer, learning_rate, loss, steps)
keywords = ['adam', 'rmsprop', 'sgd']
for keyword in keywords:
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
experiment.change_optimizer(learning_rate, loss, keyword=keyword)
optimizer = experiment.get_optimizer()
session.run(tf.compat.v1.global_variables_initializer())
print(f'Optimizer: {optimizer.name}')
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
tf.compat.v1.disable_eager_execution()
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn')
tfd = tfp.distributions
tfb = tfp.bijectors
class Planar(tfb.Bijector, tf.Module):
def __init__(self, input_dimensions, case='density_estimation', validate_args=False, name='planar_flow'):
""" usage of bijector inheritance """
super(Planar, self).__init__(
forward_min_event_ndims=1,
inverse_min_event_ndims=1,
validate_args=validate_args,
name=name)
self.event_ndims = 1
self.case = case
try:
assert self.case != 'density_estimation' or self.case != 'sampling'
except ValueError:
print('Case is not defined. Available options for case: density_estimation, sampling')
self.u = tf.Variable(np.random.uniform(-1., 1., size=(int(input_dimensions))), name='u', dtype=tf.float32, trainable=True)
self.w = tf.Variable(np.random.uniform(-1., 1., size=(int(input_dimensions))), name='w', dtype=tf.float32, trainable=True)
self.b = tf.Variable(np.random.uniform(-1., 1., size=(1)), name='b', dtype=tf.float32, trainable=True)
def h(self, y):
return tf.math.tanh(y)
def h_prime(self, y):
return 1.0 - tf.math.tanh(y) ** 2.0
def alpha(self):
wu = tf.tensordot(self.w, self.u, 1)
m = -1.0 + tf.nn.softplus(wu)
return m - wu
def _u(self):
if tf.tensordot(self.w, self.u, 1) <= -1:
alpha = self.alpha()
z_para = tf.transpose(alpha * self.w / tf.math.sqrt(tf.reduce_sum(self.w ** 2.0)))
self.u.assign_add(z_para) # self.u = self.u + z_para
def _forward_func(self, zk):
inter_1 = self.h(tf.tensordot(zk, self.w, 1) + self.b)
return tf.add(zk, tf.tensordot(inter_1, self.u, 0))
def _forward(self, zk):
if self.case == 'sampling':
return self._forward_func(zk)
else:
raise NotImplementedError('_forward is not implemented for density_estimation')
def _inverse(self, zk):
if self.case == 'density_estimation':
return self._forward_func(zk)
else:
raise NotImplementedError('_inverse is not implemented for sampling')
def _log_det_jacobian(self, zk):
psi = tf.tensordot(self.h_prime(tf.tensordot(zk, self.w, 1) + self.b), self.w, 0)
det = tf.math.abs(1.0 + tf.tensordot(psi, self.u, 1))
return tf.math.log(det)
def _forward_log_det_jacobian(self, zk):
if self.case == 'sampling':
return -self._log_det_jacobian(zk)
else:
raise NotImplementedError('_forward_log_det_jacobian is not implemented for density_estimation')
def _inverse_log_det_jacobian(self, zk):
return self._log_det_jacobian(zk)
\ No newline at end of file
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
from data_loader import load_data
from data_preprocesser import preprocess_data
from planar import Planar
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def create_tensor(data, batch_size):
dataset = tf.data.Dataset.from_tensor_slices(data.astype(np.float32))
dataset = dataset.repeat()
dataset = dataset.shuffle(buffer_size=data.shape[0])
dataset = dataset.prefetch(2*batch_size)
dataset = dataset.batch(batch_size)
data_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
samples = data_iterator.get_next()
return samples
"""
if any error on tensorflow is displayed claiming tf.float32 is not displayed,
do the following (one of them is probably enough)
** downgrade keras to 2.3.1
** replace tf.float32 with np.float32
"""
def check_version():
print(f'Tensorflow version: {tf.__version__}')
print(f'Tensorflow-probability version: {tfp.__version__}')
print(f'Keras version: {tf.keras.__version__}\n')
# In[ ]:
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set Planar parameters """
tfd = tfp.distributions
tfb = tfp.bijectors
batch_size = 32
dtype = np.float32
layers = 8
dims = data.shape[1]
# multivariate normal for base distribution
base_dist = tfd.MultivariateNormalDiag(loc=tf.zeros(shape=dims, dtype=dtype))
learning_rate = 1e-4
""" initialize samples """
samples = create_tensor(data, batch_size)
""" make Planar """
bijectors = []
for i in range(0, layers):
bijectors.append(Planar(input_dimensions=dims, case='density_estimation'))
bijector = tfb.Chain(bijectors=list(reversed(bijectors)), name='chain_of_planar')
planar_flow = tfd.TransformedDistribution(
distribution=base_dist,
bijector=bijector
)
loss = -tf.reduce_mean(planar_flow.log_prob(samples))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
session.run(tf.compat.v1.global_variables_initializer())
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
from data_loader import load_data
from data_preprocesser import preprocess_data
from planar import Planar
from experiment import Experiment
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def create_tensor(data, batch_size):
dataset = tf.data.Dataset.from_tensor_slices(data.astype(np.float32))
dataset = dataset.repeat()
dataset = dataset.shuffle(buffer_size=data.shape[0])
dataset = dataset.prefetch(2*batch_size)
dataset = dataset.batch(batch_size)
data_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
samples = data_iterator.get_next()
return samples
"""
if any error on tensorflow is displayed claiming tf.float32 is not displayed,
do the following (one of them is probably enough)
** downgrade keras to 2.3.1
** replace tf.float32 with np.float32
"""
def check_version():
print(f'Tensorflow version: {tf.__version__}')
print(f'Tensorflow-probability version: {tfp.__version__}')
print(f'Keras version: {tf.keras.__version__}\n')
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set Planar parameters """
tfd = tfp.distributions
tfb = tfp.bijectors
batch_size = 32
dtype = np.float32
# layers should be higher but for comparison fix layers to 2 for every initial test
layers = 2
dims = data.shape[1]
# multivariate normal for base distribution
base_dist = tfd.MultivariateNormalDiag(loc=tf.zeros(shape=dims, dtype=dtype))
learning_rate = 1e-4
steps = int(1e4)
""" initialize samples """
samples = create_tensor(data, batch_size)
""" make Planar """
bijectors = []
for i in range(0, layers):
bijectors.append(Planar(input_dimensions=dims, case='density_estimation'))
bijector = tfb.Chain(bijectors=list(reversed(bijectors)), name='chain_of_planar')
planar_flow = tfd.TransformedDistribution(
distribution=base_dist,
bijector=bijector
)
loss = -tf.reduce_mean(planar_flow.log_prob(samples))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
experiment = Experiment(optimizer, learning_rate, loss, steps)
keywords = ['adam', 'rmsprop', 'sgd']
for keyword in keywords:
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
experiment.change_optimizer(learning_rate, loss, keyword=keyword)
optimizer = experiment.get_optimizer()
session.run(tf.compat.v1.global_variables_initializer())
print(f'Optimizer: {optimizer.name}')
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn')
tfd = tfp.distributions
tfb = tfp.bijectors
# experimental -- inspired by
# Variational Inference with Normalizing Flows, Rezende & Shakir
# do not use for this setting, distorts base distribution a lot, no convergence in the end
def parametrize(b):
b = tf.cast(b, tf.float32)
return tf.math.log1p(tf.exp(b)).numpy()
def h(a, r):
return 1 / (a + r)
def h_prime(a, r):
return -1 / (a + r)**2
class RadialFlow(tfb.Bijector):
def __init__(self, a, b, x0, validate_args=True, name='radial-flow'):
self.a = tf.cast(a, tf.float32)
self.b = tf.cast(b, tf.float32)
self.x0 = tf.cast(x0, tf.float32)
super(RadialFlow, self).__init__(validate_args=validate_args,
forward_min_event_ndims=0,
name=name)
if validate_args:
assert tf.math.greater_equal(self.b, -self.a).numpy() == True
def _forward(self, x):
r = tf.abs(x - self.x0)
zhat = (x - self.x0) / r
y = self.x0 + r*zhat + r * zhat * self.b * h(self.a, r)
return y
def _inverse(self, y):
r = tf.abs(y - self.x0)
zhat = (y - self.x0) / r
return self.b * r * zhat * h(self.a, r)
def _forward_log_det_jacobian(self, y):
try:
n_dims = y.shape[1]
except IndexError as e:
raise RuntimeError('Input is one dimensional!')
r = tf.abs(y - self.x0)
dh = h_prime(self.a, r)
hh = h(self.a, r)
return (1 + self.b * hh)**2 * (1 + self.b * hh + self.b * dh * r)
def _inverse_log_det_jacobian(self, y):
return -self._forward_log_det_jacobian(self._inverse(y))
\ No newline at end of file
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
plt.style.use('seaborn-paper')
from data_loader import load_data
from data_preprocesser import preprocess_data
from radial import RadialFlow
def main():
tfd = tfp.distributions
tfb = tfp.bijectors
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" define the base distributon as bivariate gaussian """
base_dist = tfd.Independent(tfd.Normal(loc=[2., -0.5], scale=[1.,1.]),
reinterpreted_batch_ndims=1)
""" instantiate the bijector (a,b,x0) """
n = 1000
a = 10.
b = -10.
x0 = np.array([-0.5,1.]).astype(np.float32).reshape(-1, 2)
bijector = RadialFlow(a, b, x0)
print(f'x0 shape: {x0.shape}')
x = np.linspace(start=-4, stop=4, num=n).astype(np.float32).reshape(-1, 2)
plt.title('Forward transformation')
plt.plot(x, bijector.forward(x))
plt.show()
plt.plot(x, bijector.inverse(x))
plt.title('Inverse transformation')
plt.show()
plt.plot(x, bijector.forward_log_det_jacobian(x))
plt.title('Jacobian determinant')
plt.show()
plt.plot(x, bijector.inverse_log_det_jacobian(x))
plt.title('Inverse Jacobian determinant')
plt.show()
""" create transformed distribution """
tfd_dist = tfd.TransformedDistribution(distribution=base_dist,
bijector=bijector
)
# prior training
plt.figure(figsize=(12,5))
plt.plot(tfd_dist.prob(x), label='Trainable')
plt.plot(base_dist.prob(x), label='Base')
plt.title('Target and Trainable distribution')
plt.legend()
plt.show()
# sample, batch -- train, validation
x_train = base_dist.sample(10000)
x_train = tf.data.Dataset.from_tensor_slices(x_train)
x_train = x_train.batch(int(n/2))
x_valid = base_dist.sample(1000)
x_valid = tf.data.Dataset.from_tensor_slices(x_valid)
x_valid = x_valid.batch(int(n/2))
print(x_train.element_spec)
print(x_valid.element_spec)
print()
# instantiate trainable bijector
trainable_bijector = RadialFlow(tf.Variable(a,
name='alpha'),
tf.Variable(b,
name='beta'),
tf.Variable(x0,
name='ref'))
# instantiate trainable distribution
trainable_dist = tfd.TransformedDistribution(tfd_dist,
trainable_bijector
)
# Train the bijector
num_epochs = 10
opt = tf.keras.optimizers.Adam()
train_losses = []
valid_losses = []
norm = 1e3
for epoch in range(num_epochs):
print("Epoch {}...".format(epoch))
train_loss = tf.keras.metrics.Mean()
val_loss = tf.keras.metrics.Mean()
for train_batch in x_train:
with tf.GradientTape() as tape:
tape.watch(trainable_bijector.trainable_variables)
loss = -trainable_dist.log_prob(train_batch)
train_loss(loss)
grads = tape.gradient(loss, trainable_bijector.trainable_variables)
grads, _ = tf.clip_by_global_norm(grads, norm)
# grads = tf.reshape(tf.nn.softmax(grads[-1], axis=1), [2])
# note that both alternatives work almost identically
opt.apply_gradients(zip(grads, trainable_bijector.trainable_variables))
train_losses.append(train_loss.result().numpy())
# validation
for valid_batch in x_valid:
loss = -trainable_dist.log_prob(valid_batch)
val_loss(loss)
valid_losses.append(val_loss.result().numpy())
# Plot the learning curves
plt.plot(train_losses, label='train')
plt.plot(valid_losses, label='valid')
plt.legend()
plt.xlabel("Epochs")
plt.ylabel("Negative log likelihood")
plt.title("Training and validation loss curves")
plt.show()
# Plot the data and learned distributions
plt.figure(figsize=(12,5))
plt.plot(trainable_dist.prob(x), label='Learned')
plt.plot(base_dist.prob(x), label='Data')
plt.legend()
plt.show()
### best result obtained with tuning displayed as above
### radial flows converge fast, more epochs overfit
### DO NOT CHANGE validate_args=True
### DOES NOT USE DATASET YET
### FOR VISUALIZATION PURPOSES IN 2D
### WILL INTEGRATE DATASET AFTER LEARNING THE DISTRIBUTION
if __name__ == '__main__':
main()
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
tf.compat.v1.disable_eager_execution()
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from maf import MAF
tfd = tfp.distributions
tfb = tfp.bijectors
class RealNVP:
def __init__(self, dtype, tf_version, batch_size, params, hidden_units, base_dist, dims, shift_only, is_constant_jacobian, masked_dimension_count):
self.tf_version = tf_version
self.dtype = dtype
self.base_dist = base_dist
self.dims = dims
self.params = params
self.hidden_units = hidden_units
self.batch_size = batch_size
self.shift_only = shift_only
self.is_constant_jacobian = is_constant_jacobian
self.masked_dimension_count = masked_dimension_count
def get_tf_version(self):
return self.tf_version
def get_session(self):
return tf.compat.v1.Session()
def get_dims(self, data):
return data.shape[1]
def create_tensor(self, data):
dataset = tf.data.Dataset.from_tensor_slices(data.astype(self.dtype))
dataset = dataset.repeat()
dataset = dataset.shuffle(buffer_size=data.shape[0])
dataset = dataset.prefetch(2*self.batch_size)
dataset = dataset.batch(self.batch_size)
data_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
samples = data_iterator.get_next()
return samples
def override_masked_dimension_count(self, new_dim):
self.masked_dimension_count = new_dim
def get_shift_scale_func(self, data):
func = tfb.real_nvp_default_template(self.hidden_units, self.shift_only)
return func
def make_realnvp(self,data):
distribution = self.base_dist
sample_shape = self.get_dims(data)
shift_scale_function = self.get_shift_scale_func(data)
bijector = tfb.RealNVP(num_masked=self.masked_dimension_count, shift_and_log_scale_fn=shift_scale_function, is_constant_jacobian=self.is_constant_jacobian)
realnvp = tfd.TransformedDistribution(tfd.Sample(distribution, sample_shape), bijector)
return realnvp
\ No newline at end of file
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from data_loader import load_data
from data_preprocesser import preprocess_data
from realnvp import RealNVP
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set MAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [512,512]
base_dist = tfp.distributions.Normal(loc=0., scale=1.)
dims = data.shape[1]
learning_rate = 1e-4
""" initialize samples """
realnvp = RealNVP(dtype, tf_version, batch_size, params, hidden_units,
base_dist, dims, shift_only=True,
is_constant_jacobian=True, masked_dimension_count=dims-1)
dims = realnvp.get_dims(data)
samples = realnvp.create_tensor(data)
print(f'TensorFlow version: {realnvp.tf_version}')
print(f'Number of dimensions: {realnvp.dims}')
print(f'Learning rate: {learning_rate}')
print(f'Number of masked dimensions: {realnvp.masked_dimension_count}\n')
""" initialize RealNVP """
realnvp = realnvp.make_realnvp(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(realnvp.log_prob(samples))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
session.run(tf.compat.v1.global_variables_initializer())
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from data_loader import load_data
from data_preprocesser import preprocess_data
from realnvp import RealNVP
from experiment import Experiment
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set MAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [512,512]
base_dist = tfp.distributions.Normal(loc=0., scale=1.)
dims = data.shape[1]
learning_rate = 1e-4
steps = 1e4
""" initialize samples """
realnvp = RealNVP(dtype, tf_version, batch_size, params, hidden_units,
base_dist, dims, shift_only=True,
is_constant_jacobian=True, masked_dimension_count=dims-1)
dims = realnvp.get_dims(data)
samples = realnvp.create_tensor(data)
print(f'TensorFlow version: {realnvp.tf_version}')
print(f'Number of dimensions: {realnvp.dims}')
print(f'Learning rate: {learning_rate}')
print(f'Number of masked dimensions: {realnvp.masked_dimension_count}\n')
""" initialize RealNVP """
realnvp = realnvp.make_realnvp(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(realnvp.log_prob(samples))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
experiment = Experiment(optimizer, learning_rate, loss, steps)
keywords = ['adam', 'rmsprop', 'sgd']
for keyword in keywords:
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
experiment.change_optimizer(learning_rate, loss, keyword=keyword)
optimizer = experiment.get_optimizer()
session.run(tf.compat.v1.global_variables_initializer())
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
import numpy as np
import pandas as pd
from pathlib import Path
import xlrd
class load_data:
def __init__(self, filename, directory):
self.filename = filename
self.directory = directory
def create_directory(self, directory):
Path(directory).mkdir(parents=True, exist_ok=True)
def read_data(self, directory, filename):
data_dir = self.directory + self.filename
wb = xlrd.open_workbook(data_dir, encoding_override='iso-8859-1')
return pd.read_excel(wb)
\ No newline at end of file
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class preprocess_data:
def __init__(self, scaler, fillna_vals, dropna_vals, drop_vals):
self.scaler = scaler
self.fillna_vals = fillna_vals
self.dropna_vals = dropna_vals
self.drop_vals = drop_vals
def dropna_features(self, data):
data = data.dropna(subset = self.dropna_vals)
return data
def impute(self, data):
for feature in self.fillna_vals:
data[feature] = data[feature].fillna(value = np.mean(data[feature]))
return data
def drop_features(self, data):
data.drop(self.drop_vals, axis=1, inplace=True)
data.reset_index(drop=True, inplace=True)
return data
def encode_categorical(self, data):
data = pd.get_dummies(data)
return data
def scale(self, data):
columns = data.columns
data = self.scaler.fit_transform(data)
data = pd.DataFrame(data,columns=columns)
return data
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment