Commit 18b607dd authored by Kaan Güney Keklikçi's avatar Kaan Güney Keklikçi

realnvp with constant jacobian matrix

parent fa04fa1e
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
tf.compat.v1.disable_eager_execution()
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from maf import MAF
tfd = tfp.distributions
tfb = tfp.bijectors
class RealNVP:
def __init__(self, dtype, tf_version, batch_size, params, hidden_units, base_dist, dims, shift_only, is_constant_jacobian, masked_dimension_count):
self.tf_version = tf_version
self.dtype = dtype
self.base_dist = base_dist
self.dims = dims
self.params = params
self.hidden_units = hidden_units
self.batch_size = batch_size
self.shift_only = shift_only
self.is_constant_jacobian = is_constant_jacobian
self.masked_dimension_count = masked_dimension_count
def get_tf_version(self):
return self.tf_version
def get_session(self):
return tf.compat.v1.Session()
def get_dims(self, data):
return data.shape[1]
def create_tensor(self, data):
dataset = tf.data.Dataset.from_tensor_slices(data.astype(self.dtype))
dataset = dataset.repeat()
dataset = dataset.shuffle(buffer_size=data.shape[0])
dataset = dataset.prefetch(2*self.batch_size)
dataset = dataset.batch(self.batch_size)
data_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
samples = data_iterator.get_next()
return samples
def override_masked_dimension_count(self, new_dim):
self.masked_dimension_count = new_dim
def get_shift_scale_func(self, data):
func = tfb.real_nvp_default_template(self.hidden_units, self.shift_only)
return func
def make_realnvp(self,data):
distribution = self.base_dist
sample_shape = self.get_dims(data)
shift_scale_function = self.get_shift_scale_func(data)
bijector = tfb.RealNVP(num_masked=self.masked_dimension_count, shift_and_log_scale_fn=shift_scale_function, is_constant_jacobian=self.is_constant_jacobian)
realnvp = tfd.TransformedDistribution(tfd.Sample(distribution, sample_shape), bijector)
return realnvp
\ No newline at end of file
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from data_loader import load_data
from data_preprocesser import preprocess_data
from realnvp import RealNVP
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set MAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [512,512]
base_dist = tfp.distributions.Normal(loc=0., scale=1.)
dims = data.shape[1]
learning_rate = 1e-4
""" initialize samples """
realnvp = RealNVP(dtype, tf_version, batch_size, params, hidden_units,
base_dist, dims, shift_only=True,
is_constant_jacobian=True, masked_dimension_count=dims-1)
dims = realnvp.get_dims(data)
samples = realnvp.create_tensor(data)
print(f'TensorFlow version: {realnvp.tf_version}')
print(f'Number of dimensions: {realnvp.dims}')
print(f'Learning rate: {learning_rate}')
print(f'Number of masked dimensions: {realnvp.masked_dimension_count}\n')
""" initialize RealNVP """
realnvp = realnvp.make_realnvp(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(realnvp.log_prob(samples))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
session.run(tf.compat.v1.global_variables_initializer())
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import tensorflow_probability as tfp
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from data_loader import load_data
from data_preprocesser import preprocess_data
from realnvp import RealNVP
from experiment import Experiment
def train(session, loss, optimizer, steps=int(1e5)):
""" optimize for all dimensions """
recorded_steps = []
recorded_losses = []
for i in range(steps):
_, loss_per_iteration = session.run([optimizer, loss])
if i % 100 == 0:
recorded_steps.append(i)
recorded_losses.append(loss_per_iteration)
if i % int(1e4) == 0:
print('Iteration {iteration}: {loss}'.format(iteration=i,loss=loss_per_iteration))
return recorded_losses
def plot_results(recorded_losses):
""" plot loss """
print('Displaying results...')
fig = plt.figure(figsize=(10,5))
x = np.arange(len(recorded_losses))
y = recorded_losses
m, b = np.polyfit(x, y, 1)
plt.scatter(x, y, s=10, alpha=0.3)
plt.plot(x, m*x+b, c="r")
plt.title('Loss per 100 iteration')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()
def main():
""" load data """
filename = 'prostate.xls'
directory = '/Users/kaanguney.keklikci/Data/'
loader = load_data(filename, directory)
loader.create_directory(directory)
data = loader.read_data(directory, filename)
print('Data successfully loaded...\n')
""" preprocess data """
fillna_vals = ['sz', 'sg', 'wt']
dropna_vals = ['ekg', 'age']
drop_vals = ['patno', 'sdate']
preprocesser = preprocess_data(StandardScaler(), fillna_vals, dropna_vals, drop_vals)
data = preprocesser.dropna_features(data)
data = preprocesser.impute(data)
data = preprocesser.drop_features(data)
data = preprocesser.encode_categorical(data)
data = preprocesser.scale(data)
print('Data successfully preprocessed...\n')
""" set MAF parameters """
batch_size = 32
dtype = np.float32
tf_version = tf.__version__
params = 2
hidden_units = [512,512]
base_dist = tfp.distributions.Normal(loc=0., scale=1.)
dims = data.shape[1]
learning_rate = 1e-4
steps = 1e4
""" initialize samples """
realnvp = RealNVP(dtype, tf_version, batch_size, params, hidden_units,
base_dist, dims, shift_only=True,
is_constant_jacobian=True, masked_dimension_count=dims-1)
dims = realnvp.get_dims(data)
samples = realnvp.create_tensor(data)
print(f'TensorFlow version: {realnvp.tf_version}')
print(f'Number of dimensions: {realnvp.dims}')
print(f'Learning rate: {learning_rate}')
print(f'Number of masked dimensions: {realnvp.masked_dimension_count}\n')
""" initialize RealNVP """
realnvp = realnvp.make_realnvp(data)
print('Successfully created model...\n')
""" initialize loss and optimizer """
loss = -tf.reduce_mean(realnvp.log_prob(samples))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(loss)
experiment = Experiment(optimizer, learning_rate, loss, steps)
keywords = ['adam', 'rmsprop', 'sgd']
for keyword in keywords:
session = tf.compat.v1.Session()
tf.compat.v1.set_random_seed(42)
experiment.change_optimizer(learning_rate, loss, keyword=keyword)
optimizer = experiment.get_optimizer()
session.run(tf.compat.v1.global_variables_initializer())
print('Optimizer and loss successfully defined...\n')
""" start training """
recorded_losses = train(session, loss, optimizer)
print('Training finished...\n')
""" display results """
plot_results(recorded_losses)
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment