Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
beta-vae-normalizing-flows
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Kaan Güney Keklikçi
beta-vae-normalizing-flows
Commits
eea1f5fe
Unverified
Commit
eea1f5fe
authored
Oct 18, 2021
by
kaanguney
Committed by
GitHub
Oct 18, 2021
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Delete scripts directory
parent
0da97b56
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
0 additions
and
1875 deletions
+0
-1875
experiment.py
scripts/experiment.py
+0
-29
cubic.py
scripts/flows/cubic/cubic.py
+0
-31
cubic_execute.py
scripts/flows/cubic/cubic_execute.py
+0
-164
iaf_execute.py
scripts/flows/iaf/iaf_execute.py
+0
-133
iaf_optimizer_experiment.py
scripts/flows/iaf/iaf_optimizer_experiment.py
+0
-156
maf.py
scripts/flows/maf/maf.py
+0
-85
maf_execute.py
scripts/flows/maf/maf_execute.py
+0
-136
maf_optimizer_experiment.py
scripts/flows/maf/maf_optimizer_experiment.py
+0
-156
planar.py
scripts/flows/planar/planar.py
+0
-81
planar_execute.py
scripts/flows/planar/planar_execute.py
+0
-144
planar_optimizer_experiment.py
scripts/flows/planar/planar_optimizer_experiment.py
+0
-154
radial.py
scripts/flows/radial/radial.py
+0
-58
radial_execute.py
scripts/flows/radial/radial_execute.py
+0
-175
realnvp.py
scripts/flows/realnvp/realnvp.py
+0
-59
realnvp_execute.py
scripts/flows/realnvp/realnvp_execute.py
+0
-125
realnvp_optimizer_experiment.py
scripts/flows/realnvp/realnvp_optimizer_experiment.py
+0
-137
data_loader.py
scripts/preprocessing/data_loader.py
+0
-17
data_preprocesser.py
scripts/preprocessing/data_preprocesser.py
+0
-35
No files found.
scripts/experiment.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
os
class
Experiment
:
def
__init__
(
self
,
optimizer
,
learning_rate
,
loss
,
steps
=
int
(
1e5
)):
self
.
optimizer
=
optimizer
self
.
steps
=
int
(
steps
)
def
change_optimizer
(
self
,
learning_rate
,
loss
,
keyword
=
'adam'
):
if
keyword
==
'adam'
:
self
.
optimizer
=
self
.
optimizer
elif
keyword
==
'sgd'
:
self
.
optimizer
=
tf
.
compat
.
v1
.
train
.
GradientDescentOptimizer
(
learning_rate
)
.
minimize
(
loss
)
elif
keyword
==
'rmsprop'
:
self
.
optimizer
=
tf
.
compat
.
v1
.
train
.
RMSPropOptimizer
(
learning_rate
)
.
minimize
(
loss
)
else
:
raise
NotImplementedError
(
'Undefined optimizer!'
)
def
get_optimizer
(
self
):
return
self
.
optimizer
def
set_iteration_count
(
self
,
iteration_count
):
self
.
steps
=
iteration_count
def
get_iteration_count
(
self
):
return
self
.
steps
\ No newline at end of file
scripts/flows/cubic/cubic.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_probability
as
tfp
import
os
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
class
Cubic
(
tfb
.
Bijector
):
def
__init__
(
self
,
a
,
b
,
validate_args
=
False
,
name
=
'Cubic'
):
self
.
a
=
tf
.
cast
(
a
,
tf
.
float32
)
self
.
b
=
tf
.
cast
(
b
,
tf
.
float32
)
if
validate_args
:
assert
tf
.
reduce_mean
(
tf
.
cast
(
tf
.
math
.
greater_equal
(
tf
.
abs
(
self
.
a
),
1e-5
),
tf
.
float32
))
==
1.0
assert
tf
.
reduce_mean
(
tf
.
cast
(
tf
.
math
.
greater_equal
(
tf
.
abs
(
self
.
b
),
1e-5
),
tf
.
float32
))
==
1.0
super
(
Cubic
,
self
)
.
__init__
(
validate_args
=
validate_args
,
forward_min_event_ndims
=
0
,
name
=
name
)
def
_forward
(
self
,
x
):
x
=
tf
.
cast
(
x
,
tf
.
float32
)
return
tf
.
squeeze
(
tf
.
pow
(
self
.
a
*
x
+
self
.
b
,
3
))
def
_inverse
(
self
,
y
):
y
=
tf
.
cast
(
y
,
tf
.
float32
)
return
(
tf
.
math
.
sign
(
y
)
*
tf
.
pow
(
tf
.
abs
(
y
),
1
/
3
)
-
self
.
b
)
/
self
.
a
def
_forward_log_det_jacobian
(
self
,
x
):
x
=
tf
.
cast
(
x
,
tf
.
float32
)
return
tf
.
math
.
log
(
3.
*
tf
.
abs
(
self
.
a
))
+
2.
*
tf
.
math
.
log
(
tf
.
abs
(
self
.
a
*
x
+
self
.
b
))
def
_inverse_log_det_jacobian
(
self
,
x
):
return
-
self
.
_forward_log_det_jacobian
(
self
.
_inverse
(
x
))
\ No newline at end of file
scripts/flows/cubic/cubic_execute.py
deleted
100644 → 0
View file @
0da97b56
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
import
tensorflow_probability
as
tfp
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn-paper'
)
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
cubic
import
Cubic
def
main
():
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" define the base distributon as bivariate gaussian """
n
=
10000
base_dist
=
tfd
.
MultivariateNormalDiag
(
loc
=
[
0.
,
0.
],
scale_diag
=
[
1.
,
1.
])
""" instantiate the bijector """
a
=
-
0.1
b
=
5.
bijector
=
Cubic
(
a
,
b
,
validate_args
=
True
)
x
=
np
.
linspace
(
start
=-
4
,
stop
=
4
,
num
=
n
)
.
astype
(
np
.
float32
)
.
reshape
(
-
1
,
2
)
plt
.
title
(
'Forward transformation'
)
plt
.
plot
(
x
,
bijector
.
forward
(
x
))
plt
.
show
()
plt
.
plot
(
x
,
bijector
.
inverse
(
x
))
plt
.
title
(
'Inverse transformation'
)
plt
.
show
()
plt
.
plot
(
x
,
bijector
.
forward_log_det_jacobian
(
x
))
plt
.
title
(
'Jacobian determinant'
)
plt
.
show
()
plt
.
plot
(
x
,
bijector
.
inverse_log_det_jacobian
(
x
))
plt
.
title
(
'Inverse Jacobian determinant'
)
plt
.
show
()
""" create transformed distribution """
tfd_dist
=
tfd
.
TransformedDistribution
(
distribution
=
base_dist
,
bijector
=
bijector
)
# prior training
plt
.
figure
(
figsize
=
(
12
,
5
))
plt
.
plot
(
tfd_dist
.
prob
(
x
),
label
=
'Trainable'
)
plt
.
plot
(
base_dist
.
prob
(
x
),
label
=
'Base'
)
plt
.
title
(
'Target and Trainable distribution'
)
plt
.
legend
()
plt
.
show
()
# sample, batch -- train, validation
x_train
=
base_dist
.
sample
(
10000
)
x_train
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
x_train
)
x_train
=
x_train
.
batch
(
int
(
n
/
2
))
x_valid
=
base_dist
.
sample
(
1000
)
x_valid
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
x_valid
)
x_valid
=
x_valid
.
batch
(
int
(
n
/
2
))
print
(
x_train
.
element_spec
)
print
(
x_valid
.
element_spec
)
print
()
# instantiate trainable bijector
trainable_bijector
=
tfb
.
Invert
(
Cubic
(
tf
.
Variable
(
a
,
name
=
'alpha'
),
tf
.
Variable
(
b
,
name
=
'beta'
)
))
# instantiate trainable distribution
trainable_dist
=
tfd
.
TransformedDistribution
(
tfd_dist
,
trainable_bijector
)
# Train the bijector
num_epochs
=
10
opt
=
tf
.
keras
.
optimizers
.
Adam
()
train_losses
=
[]
valid_losses
=
[]
norm
=
1e3
for
epoch
in
range
(
num_epochs
):
print
(
"Epoch {}..."
.
format
(
epoch
))
train_loss
=
tf
.
keras
.
metrics
.
Mean
()
val_loss
=
tf
.
keras
.
metrics
.
Mean
()
for
train_batch
in
x_train
:
with
tf
.
GradientTape
()
as
tape
:
tape
.
watch
(
trainable_bijector
.
trainable_variables
)
loss
=
-
trainable_dist
.
log_prob
(
train_batch
)
train_loss
(
loss
)
grads
=
tape
.
gradient
(
loss
,
trainable_bijector
.
trainable_variables
)
grads
,
_
=
tf
.
clip_by_global_norm
(
grads
,
norm
)
# grads = tf.reshape(tf.nn.softmax(grads[-1], axis=1), [2])
# note that both alternatives work almost identically
opt
.
apply_gradients
(
zip
(
grads
,
trainable_bijector
.
trainable_variables
))
train_losses
.
append
(
train_loss
.
result
()
.
numpy
())
# validation
for
valid_batch
in
x_valid
:
loss
=
-
trainable_dist
.
log_prob
(
valid_batch
)
val_loss
(
loss
)
valid_losses
.
append
(
val_loss
.
result
()
.
numpy
())
# Plot the learning curves
plt
.
plot
(
train_losses
,
label
=
'train'
)
plt
.
plot
(
valid_losses
,
label
=
'valid'
)
plt
.
legend
()
plt
.
xlabel
(
"Epochs"
)
plt
.
ylabel
(
"Negative log likelihood"
)
plt
.
title
(
"Training and validation loss curves"
)
plt
.
show
()
# Plot the data and learned distributions
plt
.
figure
(
figsize
=
(
12
,
5
))
plt
.
plot
(
trainable_dist
.
prob
(
x
),
label
=
'Learned'
)
plt
.
plot
(
base_dist
.
prob
(
x
),
label
=
'Data'
)
plt
.
legend
()
plt
.
show
()
### best result obtained with tuning displayed as above
### radial flows converge fast, more epochs overfit
### DO NOT CHANGE validate_args=True
if
__name__
==
'__main__'
:
main
()
scripts/flows/iaf/iaf_execute.py
deleted
100644 → 0
View file @
0da97b56
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
maf
import
IAF
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set MAF parameters """
batch_size
=
32
dtype
=
np
.
float32
tf_version
=
tf
.
__version__
params
=
2
hidden_units
=
[
5
,
5
]
base_dist
=
tfp
.
distributions
.
Normal
(
loc
=
0.
,
scale
=
1.
)
dims
=
data
.
shape
[
1
]
learning_rate
=
1e-4
activation
=
'relu'
hidden_degrees
=
'random'
conditional
=
True
conditional_event_shape
=
(
dims
,)
event_shape
=
conditional_event_shape
conditional_input_layers
=
'first_layer'
""" initialize samples """
iaf
=
IAF
(
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
activation
,
conditional
,
hidden_degrees
,
conditional_event_shape
,
conditional_input_layers
,
event_shape
)
dims
=
iaf
.
get_dims
(
data
)
samples
=
iaf
.
create_tensor
(
data
)
print
(
f
'TensorFlow version: {iaf.tf_version}'
)
print
(
f
'Number of dimensions: {iaf.dims}'
)
print
(
f
'Learning rate: {learning_rate}
\n
'
)
""" initialize iaf """
iaf
=
iaf
.
make_maf
(
data
)
print
(
'Successfully created model...
\n
'
)
""" initialize loss and optimizer """
loss
=
-
tf
.
reduce_mean
(
iaf
.
log_prob
(
samples
,
bijector_kwargs
=
{
'conditional_input'
:
samples
}))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/flows/iaf/iaf_optimizer_experiment.py
deleted
100644 → 0
View file @
0da97b56
""" use smaller learning rate for gradient descent or increase batch size """
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
time
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
tensorflow.python.util.deprecation
as
deprecation
deprecation
.
_PRINT_DEPRECATION_WARNINGS
=
False
import
matplotlib.pyplot
as
plt
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
maf
import
IAF
from
experiment
import
Experiment
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
start_time
=
time
.
time
()
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
print
(
'
\n
Training completed...'
)
print
(
f
'Training time: {time.time() - start_time} seconds'
)
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set IAF parameters """
batch_size
=
32
dtype
=
np
.
float32
tf_version
=
tf
.
__version__
params
=
2
hidden_units
=
[
5
,
5
]
# set this to a small number if you are using CPU
base_dist
=
tfp
.
distributions
.
Normal
(
loc
=
0.
,
scale
=
1.
,
name
=
"gaussian"
)
dims
=
data
.
shape
[
1
]
learning_rate
=
1e-4
steps
=
1e4
activation
=
'relu'
hidden_degrees
=
'random'
conditional
=
True
conditional_event_shape
=
(
dims
,)
event_shape
=
conditional_event_shape
conditional_input_layers
=
'first_layer'
""" initialize samples """
iaf
=
IAF
(
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
activation
,
conditional
,
hidden_degrees
,
conditional_event_shape
,
conditional_input_layers
,
event_shape
)
dims
=
iaf
.
get_dims
(
data
)
samples
=
iaf
.
create_tensor
(
data
)
print
(
f
'TensorFlow version: {iaf.tf_version}'
)
print
(
f
'Number of dimensions: {iaf.dims}'
)
print
(
f
'Learning rate: {learning_rate}
\n
'
)
""" initialize IAF """
iaf
=
iaf
.
make_maf
(
data
)
print
(
'Successfully created model...
\n
'
)
""" initialize loss and optimizer """
loss
=
-
tf
.
reduce_mean
(
iaf
.
log_prob
(
samples
,
bijector_kwargs
=
{
'conditional_input'
:
samples
}))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
experiment
=
Experiment
(
optimizer
,
learning_rate
,
loss
,
steps
)
keywords
=
[
'adam'
,
'rmsprop'
,
'sgd'
]
for
keyword
in
keywords
:
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
experiment
.
change_optimizer
(
learning_rate
,
loss
,
keyword
=
keyword
)
optimizer
=
experiment
.
get_optimizer
()
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
f
'Optimizer: {optimizer.name}'
)
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/flows/maf/maf.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_probability
as
tfp
tf
.
compat
.
v1
.
disable_eager_execution
()
import
os
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
class
MAF
(
object
):
def
__init__
(
self
,
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
activation
,
conditional
,
hidden_degrees
,
conditional_event_shape
,
conditional_input_layers
,
event_shape
):
self
.
tf_version
=
tf_version
self
.
dtype
=
dtype
self
.
base_dist
=
base_dist
self
.
dims
=
dims
self
.
params
=
params
self
.
hidden_units
=
hidden_units
self
.
batch_size
=
batch_size
self
.
activation
=
activation
self
.
conditional
=
conditional
self
.
conditional_event_shape
=
conditional_event_shape
self
.
hidden_degrees
=
hidden_degrees
self
.
conditional_input_layers
=
conditional_input_layers
self
.
event_shape
=
event_shape
def
get_tf_version
(
self
):
return
self
.
tf_version
def
get_session
(
self
):
return
tf
.
compat
.
v1
.
Session
()
def
get_dims
(
self
,
data
):
return
data
.
shape
[
1
]
def
create_tensor
(
self
,
data
):
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
data
.
astype
(
self
.
dtype
))
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
shuffle
(
buffer_size
=
data
.
shape
[
0
])
dataset
=
dataset
.
prefetch
(
2
*
self
.
batch_size
)
dataset
=
dataset
.
batch
(
self
.
batch_size
)
data_iterator
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
dataset
)
samples
=
data_iterator
.
get_next
()
return
samples
def
get_shift_scale_func
(
self
):
func
=
tfb
.
AutoregressiveNetwork
(
params
=
self
.
params
,
hidden_units
=
self
.
hidden_units
,
activation
=
self
.
activation
,
conditional
=
self
.
conditional
,
conditional_event_shape
=
self
.
conditional_event_shape
,
event_shape
=
self
.
event_shape
,
conditional_input_layers
=
self
.
conditional_input_layers
,
hidden_degrees
=
self
.
hidden_degrees
)
return
func
def
make_maf
(
self
,
data
):
distribution
=
self
.
base_dist
sample_shape
=
self
.
get_dims
(
data
)
shift_scale_function
=
self
.
get_shift_scale_func
()
bijector
=
tfb
.
MaskedAutoregressiveFlow
(
shift_scale_function
)
maf
=
tfd
.
TransformedDistribution
(
tfd
.
Sample
(
distribution
,
sample_shape
),
bijector
)
return
maf
class
IAF
(
MAF
):
def
make_maf
(
self
,
data
):
distribution
=
self
.
base_dist
sample_shape
=
self
.
get_dims
(
data
)
shift_scale_function
=
self
.
get_shift_scale_func
()
bijector
=
tfb
.
Invert
(
tfb
.
MaskedAutoregressiveFlow
(
shift_scale_function
))
maf
=
tfd
.
TransformedDistribution
(
tfd
.
Sample
(
distribution
,
sample_shape
),
bijector
)
return
maf
\ No newline at end of file
scripts/flows/maf/maf_execute.py
deleted
100644 → 0
View file @
0da97b56
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
maf
import
MAF
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set MAF parameters """
batch_size
=
32
dtype
=
np
.
float32
tf_version
=
tf
.
__version__
params
=
2
hidden_units
=
[
512
,
512
]
base_dist
=
tfp
.
distributions
.
Normal
(
loc
=
0.
,
scale
=
1.
)
dims
=
data
.
shape
[
1
]
learning_rate
=
1e-4
activation
=
'relu'
hidden_degrees
=
'random'
conditional
=
True
conditional_event_shape
=
(
dims
,)
event_shape
=
conditional_event_shape
conditional_input_layers
=
'first_layer'
""" initialize samples """
maf
=
MAF
(
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
activation
,
conditional
,
hidden_degrees
,
conditional_event_shape
,
conditional_input_layers
,
event_shape
)
dims
=
maf
.
get_dims
(
data
)
samples
=
maf
.
create_tensor
(
data
)
print
(
f
'TensorFlow version: {maf.tf_version}'
)
print
(
f
'Number of dimensions: {maf.dims}'
)
print
(
f
'Learning rate: {learning_rate}
\n
'
)
""" initialize MAF """
maf
=
maf
.
make_maf
(
data
)
print
(
'Successfully created model...
\n
'
)
""" initialize loss and optimizer """
loss
=
-
tf
.
reduce_mean
(
maf
.
log_prob
(
samples
,
bijector_kwargs
=
{
'conditional_input'
:
samples
}))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/flows/maf/maf_optimizer_experiment.py
deleted
100644 → 0
View file @
0da97b56
""" use smaller learning rate for gradient descent or increase batch size """
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
time
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
tensorflow.python.util.deprecation
as
deprecation
deprecation
.
_PRINT_DEPRECATION_WARNINGS
=
False
import
matplotlib.pyplot
as
plt
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
maf
import
MAF
from
experiment
import
Experiment
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
start_time
=
time
.
time
()
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
print
(
'
\n
Training completed...'
)
print
(
f
'Training time: {time.time() - start_time} seconds'
)
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set MAF parameters """
batch_size
=
32
dtype
=
np
.
float32
tf_version
=
tf
.
__version__
params
=
2
hidden_units
=
[
512
,
512
]
base_dist
=
tfp
.
distributions
.
Normal
(
loc
=
0.
,
scale
=
1.
,
name
=
"gaussian"
)
dims
=
data
.
shape
[
1
]
learning_rate
=
1e-4
steps
=
1e4
activation
=
'relu'
hidden_degrees
=
'random'
conditional
=
True
conditional_event_shape
=
(
dims
,)
event_shape
=
conditional_event_shape
conditional_input_layers
=
'first_layer'
""" initialize samples """
maf
=
MAF
(
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
activation
,
conditional
,
hidden_degrees
,
conditional_event_shape
,
conditional_input_layers
,
event_shape
)
dims
=
maf
.
get_dims
(
data
)
samples
=
maf
.
create_tensor
(
data
)
print
(
f
'TensorFlow version: {maf.tf_version}'
)
print
(
f
'Number of dimensions: {maf.dims}'
)
print
(
f
'Learning rate: {learning_rate}
\n
'
)
""" initialize MAF """
maf
=
maf
.
make_maf
(
data
)
print
(
'Successfully created model...
\n
'
)
""" initialize loss and optimizer """
loss
=
-
tf
.
reduce_mean
(
maf
.
log_prob
(
samples
,
bijector_kwargs
=
{
'conditional_input'
:
samples
}))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
experiment
=
Experiment
(
optimizer
,
learning_rate
,
loss
,
steps
)
keywords
=
[
'adam'
,
'rmsprop'
,
'sgd'
]
for
keyword
in
keywords
:
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
experiment
.
change_optimizer
(
learning_rate
,
loss
,
keyword
=
keyword
)
optimizer
=
experiment
.
get_optimizer
()
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
f
'Optimizer: {optimizer.name}'
)
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/flows/planar/planar.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_probability
as
tfp
tf
.
compat
.
v1
.
disable_eager_execution
()
import
os
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
class
Planar
(
tfb
.
Bijector
,
tf
.
Module
):
def
__init__
(
self
,
input_dimensions
,
case
=
'density_estimation'
,
validate_args
=
False
,
name
=
'planar_flow'
):
""" usage of bijector inheritance """
super
(
Planar
,
self
)
.
__init__
(
forward_min_event_ndims
=
1
,
inverse_min_event_ndims
=
1
,
validate_args
=
validate_args
,
name
=
name
)
self
.
event_ndims
=
1
self
.
case
=
case
try
:
assert
self
.
case
!=
'density_estimation'
or
self
.
case
!=
'sampling'
except
ValueError
:
print
(
'Case is not defined. Available options for case: density_estimation, sampling'
)
self
.
u
=
tf
.
Variable
(
np
.
random
.
uniform
(
-
1.
,
1.
,
size
=
(
int
(
input_dimensions
))),
name
=
'u'
,
dtype
=
tf
.
float32
,
trainable
=
True
)
self
.
w
=
tf
.
Variable
(
np
.
random
.
uniform
(
-
1.
,
1.
,
size
=
(
int
(
input_dimensions
))),
name
=
'w'
,
dtype
=
tf
.
float32
,
trainable
=
True
)
self
.
b
=
tf
.
Variable
(
np
.
random
.
uniform
(
-
1.
,
1.
,
size
=
(
1
)),
name
=
'b'
,
dtype
=
tf
.
float32
,
trainable
=
True
)
def
h
(
self
,
y
):
return
tf
.
math
.
tanh
(
y
)
def
h_prime
(
self
,
y
):
return
1.0
-
tf
.
math
.
tanh
(
y
)
**
2.0
def
alpha
(
self
):
wu
=
tf
.
tensordot
(
self
.
w
,
self
.
u
,
1
)
m
=
-
1.0
+
tf
.
nn
.
softplus
(
wu
)
return
m
-
wu
def
_u
(
self
):
if
tf
.
tensordot
(
self
.
w
,
self
.
u
,
1
)
<=
-
1
:
alpha
=
self
.
alpha
()
z_para
=
tf
.
transpose
(
alpha
*
self
.
w
/
tf
.
math
.
sqrt
(
tf
.
reduce_sum
(
self
.
w
**
2.0
)))
self
.
u
.
assign_add
(
z_para
)
# self.u = self.u + z_para
def
_forward_func
(
self
,
zk
):
inter_1
=
self
.
h
(
tf
.
tensordot
(
zk
,
self
.
w
,
1
)
+
self
.
b
)
return
tf
.
add
(
zk
,
tf
.
tensordot
(
inter_1
,
self
.
u
,
0
))
def
_forward
(
self
,
zk
):
if
self
.
case
==
'sampling'
:
return
self
.
_forward_func
(
zk
)
else
:
raise
NotImplementedError
(
'_forward is not implemented for density_estimation'
)
def
_inverse
(
self
,
zk
):
if
self
.
case
==
'density_estimation'
:
return
self
.
_forward_func
(
zk
)
else
:
raise
NotImplementedError
(
'_inverse is not implemented for sampling'
)
def
_log_det_jacobian
(
self
,
zk
):
psi
=
tf
.
tensordot
(
self
.
h_prime
(
tf
.
tensordot
(
zk
,
self
.
w
,
1
)
+
self
.
b
),
self
.
w
,
0
)
det
=
tf
.
math
.
abs
(
1.0
+
tf
.
tensordot
(
psi
,
self
.
u
,
1
))
return
tf
.
math
.
log
(
det
)
def
_forward_log_det_jacobian
(
self
,
zk
):
if
self
.
case
==
'sampling'
:
return
-
self
.
_log_det_jacobian
(
zk
)
else
:
raise
NotImplementedError
(
'_forward_log_det_jacobian is not implemented for density_estimation'
)
def
_inverse_log_det_jacobian
(
self
,
zk
):
return
self
.
_log_det_jacobian
(
zk
)
\ No newline at end of file
scripts/flows/planar/planar_execute.py
deleted
100644 → 0
View file @
0da97b56
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
matplotlib.pyplot
as
plt
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
planar
import
Planar
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
create_tensor
(
data
,
batch_size
):
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
data
.
astype
(
np
.
float32
))
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
shuffle
(
buffer_size
=
data
.
shape
[
0
])
dataset
=
dataset
.
prefetch
(
2
*
batch_size
)
dataset
=
dataset
.
batch
(
batch_size
)
data_iterator
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
dataset
)
samples
=
data_iterator
.
get_next
()
return
samples
"""
if any error on tensorflow is displayed claiming tf.float32 is not displayed,
do the following (one of them is probably enough)
** downgrade keras to 2.3.1
** replace tf.float32 with np.float32
"""
def
check_version
():
print
(
f
'Tensorflow version: {tf.__version__}'
)
print
(
f
'Tensorflow-probability version: {tfp.__version__}'
)
print
(
f
'Keras version: {tf.keras.__version__}
\n
'
)
# In[ ]:
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set Planar parameters """
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
batch_size
=
32
dtype
=
np
.
float32
layers
=
8
dims
=
data
.
shape
[
1
]
# multivariate normal for base distribution
base_dist
=
tfd
.
MultivariateNormalDiag
(
loc
=
tf
.
zeros
(
shape
=
dims
,
dtype
=
dtype
))
learning_rate
=
1e-4
""" initialize samples """
samples
=
create_tensor
(
data
,
batch_size
)
""" make Planar """
bijectors
=
[]
for
i
in
range
(
0
,
layers
):
bijectors
.
append
(
Planar
(
input_dimensions
=
dims
,
case
=
'density_estimation'
))
bijector
=
tfb
.
Chain
(
bijectors
=
list
(
reversed
(
bijectors
)),
name
=
'chain_of_planar'
)
planar_flow
=
tfd
.
TransformedDistribution
(
distribution
=
base_dist
,
bijector
=
bijector
)
loss
=
-
tf
.
reduce_mean
(
planar_flow
.
log_prob
(
samples
))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/flows/planar/planar_optimizer_experiment.py
deleted
100644 → 0
View file @
0da97b56
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
matplotlib.pyplot
as
plt
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
planar
import
Planar
from
experiment
import
Experiment
import
tensorflow.python.util.deprecation
as
deprecation
deprecation
.
_PRINT_DEPRECATION_WARNINGS
=
False
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
create_tensor
(
data
,
batch_size
):
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
data
.
astype
(
np
.
float32
))
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
shuffle
(
buffer_size
=
data
.
shape
[
0
])
dataset
=
dataset
.
prefetch
(
2
*
batch_size
)
dataset
=
dataset
.
batch
(
batch_size
)
data_iterator
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
dataset
)
samples
=
data_iterator
.
get_next
()
return
samples
"""
if any error on tensorflow is displayed claiming tf.float32 is not displayed,
do the following (one of them is probably enough)
** downgrade keras to 2.3.1
** replace tf.float32 with np.float32
"""
def
check_version
():
print
(
f
'Tensorflow version: {tf.__version__}'
)
print
(
f
'Tensorflow-probability version: {tfp.__version__}'
)
print
(
f
'Keras version: {tf.keras.__version__}
\n
'
)
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set Planar parameters """
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
batch_size
=
32
dtype
=
np
.
float32
# layers should be higher but for comparison fix layers to 2 for every initial test
layers
=
2
dims
=
data
.
shape
[
1
]
# multivariate normal for base distribution
base_dist
=
tfd
.
MultivariateNormalDiag
(
loc
=
tf
.
zeros
(
shape
=
dims
,
dtype
=
dtype
))
learning_rate
=
1e-4
steps
=
int
(
1e4
)
""" initialize samples """
samples
=
create_tensor
(
data
,
batch_size
)
""" make Planar """
bijectors
=
[]
for
i
in
range
(
0
,
layers
):
bijectors
.
append
(
Planar
(
input_dimensions
=
dims
,
case
=
'density_estimation'
))
bijector
=
tfb
.
Chain
(
bijectors
=
list
(
reversed
(
bijectors
)),
name
=
'chain_of_planar'
)
planar_flow
=
tfd
.
TransformedDistribution
(
distribution
=
base_dist
,
bijector
=
bijector
)
loss
=
-
tf
.
reduce_mean
(
planar_flow
.
log_prob
(
samples
))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
experiment
=
Experiment
(
optimizer
,
learning_rate
,
loss
,
steps
)
keywords
=
[
'adam'
,
'rmsprop'
,
'sgd'
]
for
keyword
in
keywords
:
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
experiment
.
change_optimizer
(
learning_rate
,
loss
,
keyword
=
keyword
)
optimizer
=
experiment
.
get_optimizer
()
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
f
'Optimizer: {optimizer.name}'
)
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/flows/radial/radial.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_probability
as
tfp
import
os
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
# experimental -- inspired by
# Variational Inference with Normalizing Flows, Rezende & Shakir
# do not use for this setting, distorts base distribution a lot, no convergence in the end
def
parametrize
(
b
):
b
=
tf
.
cast
(
b
,
tf
.
float32
)
return
tf
.
math
.
log1p
(
tf
.
exp
(
b
))
.
numpy
()
def
h
(
a
,
r
):
return
1
/
(
a
+
r
)
def
h_prime
(
a
,
r
):
return
-
1
/
(
a
+
r
)
**
2
class
RadialFlow
(
tfb
.
Bijector
):
def
__init__
(
self
,
a
,
b
,
x0
,
validate_args
=
True
,
name
=
'radial-flow'
):
self
.
a
=
tf
.
cast
(
a
,
tf
.
float32
)
self
.
b
=
tf
.
cast
(
b
,
tf
.
float32
)
self
.
x0
=
tf
.
cast
(
x0
,
tf
.
float32
)
super
(
RadialFlow
,
self
)
.
__init__
(
validate_args
=
validate_args
,
forward_min_event_ndims
=
0
,
name
=
name
)
if
validate_args
:
assert
tf
.
math
.
greater_equal
(
self
.
b
,
-
self
.
a
)
.
numpy
()
==
True
def
_forward
(
self
,
x
):
r
=
tf
.
abs
(
x
-
self
.
x0
)
zhat
=
(
x
-
self
.
x0
)
/
r
y
=
self
.
x0
+
r
*
zhat
+
r
*
zhat
*
self
.
b
*
h
(
self
.
a
,
r
)
return
y
def
_inverse
(
self
,
y
):
r
=
tf
.
abs
(
y
-
self
.
x0
)
zhat
=
(
y
-
self
.
x0
)
/
r
return
self
.
b
*
r
*
zhat
*
h
(
self
.
a
,
r
)
def
_forward_log_det_jacobian
(
self
,
y
):
try
:
n_dims
=
y
.
shape
[
1
]
except
IndexError
as
e
:
raise
RuntimeError
(
'Input is one dimensional!'
)
r
=
tf
.
abs
(
y
-
self
.
x0
)
dh
=
h_prime
(
self
.
a
,
r
)
hh
=
h
(
self
.
a
,
r
)
return
(
1
+
self
.
b
*
hh
)
**
2
*
(
1
+
self
.
b
*
hh
+
self
.
b
*
dh
*
r
)
def
_inverse_log_det_jacobian
(
self
,
y
):
return
-
self
.
_forward_log_det_jacobian
(
self
.
_inverse
(
y
))
\ No newline at end of file
scripts/flows/radial/radial_execute.py
deleted
100644 → 0
View file @
0da97b56
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
import
tensorflow_probability
as
tfp
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn-paper'
)
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
radial
import
RadialFlow
def
main
():
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" define the base distributon as bivariate gaussian """
base_dist
=
tfd
.
Independent
(
tfd
.
Normal
(
loc
=
[
2.
,
-
0.5
],
scale
=
[
1.
,
1.
]),
reinterpreted_batch_ndims
=
1
)
""" instantiate the bijector (a,b,x0) """
n
=
1000
a
=
10.
b
=
-
10.
x0
=
np
.
array
([
-
0.5
,
1.
])
.
astype
(
np
.
float32
)
.
reshape
(
-
1
,
2
)
bijector
=
RadialFlow
(
a
,
b
,
x0
)
print
(
f
'x0 shape: {x0.shape}'
)
x
=
np
.
linspace
(
start
=-
4
,
stop
=
4
,
num
=
n
)
.
astype
(
np
.
float32
)
.
reshape
(
-
1
,
2
)
plt
.
title
(
'Forward transformation'
)
plt
.
plot
(
x
,
bijector
.
forward
(
x
))
plt
.
show
()
plt
.
plot
(
x
,
bijector
.
inverse
(
x
))
plt
.
title
(
'Inverse transformation'
)
plt
.
show
()
plt
.
plot
(
x
,
bijector
.
forward_log_det_jacobian
(
x
))
plt
.
title
(
'Jacobian determinant'
)
plt
.
show
()
plt
.
plot
(
x
,
bijector
.
inverse_log_det_jacobian
(
x
))
plt
.
title
(
'Inverse Jacobian determinant'
)
plt
.
show
()
""" create transformed distribution """
tfd_dist
=
tfd
.
TransformedDistribution
(
distribution
=
base_dist
,
bijector
=
bijector
)
# prior training
plt
.
figure
(
figsize
=
(
12
,
5
))
plt
.
plot
(
tfd_dist
.
prob
(
x
),
label
=
'Trainable'
)
plt
.
plot
(
base_dist
.
prob
(
x
),
label
=
'Base'
)
plt
.
title
(
'Target and Trainable distribution'
)
plt
.
legend
()
plt
.
show
()
# sample, batch -- train, validation
x_train
=
base_dist
.
sample
(
10000
)
x_train
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
x_train
)
x_train
=
x_train
.
batch
(
int
(
n
/
2
))
x_valid
=
base_dist
.
sample
(
1000
)
x_valid
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
x_valid
)
x_valid
=
x_valid
.
batch
(
int
(
n
/
2
))
print
(
x_train
.
element_spec
)
print
(
x_valid
.
element_spec
)
print
()
# instantiate trainable bijector
trainable_bijector
=
RadialFlow
(
tf
.
Variable
(
a
,
name
=
'alpha'
),
tf
.
Variable
(
b
,
name
=
'beta'
),
tf
.
Variable
(
x0
,
name
=
'ref'
))
# instantiate trainable distribution
trainable_dist
=
tfd
.
TransformedDistribution
(
tfd_dist
,
trainable_bijector
)
# Train the bijector
num_epochs
=
10
opt
=
tf
.
keras
.
optimizers
.
Adam
()
train_losses
=
[]
valid_losses
=
[]
norm
=
1e3
for
epoch
in
range
(
num_epochs
):
print
(
"Epoch {}..."
.
format
(
epoch
))
train_loss
=
tf
.
keras
.
metrics
.
Mean
()
val_loss
=
tf
.
keras
.
metrics
.
Mean
()
for
train_batch
in
x_train
:
with
tf
.
GradientTape
()
as
tape
:
tape
.
watch
(
trainable_bijector
.
trainable_variables
)
loss
=
-
trainable_dist
.
log_prob
(
train_batch
)
train_loss
(
loss
)
grads
=
tape
.
gradient
(
loss
,
trainable_bijector
.
trainable_variables
)
grads
,
_
=
tf
.
clip_by_global_norm
(
grads
,
norm
)
# grads = tf.reshape(tf.nn.softmax(grads[-1], axis=1), [2])
# note that both alternatives work almost identically
opt
.
apply_gradients
(
zip
(
grads
,
trainable_bijector
.
trainable_variables
))
train_losses
.
append
(
train_loss
.
result
()
.
numpy
())
# validation
for
valid_batch
in
x_valid
:
loss
=
-
trainable_dist
.
log_prob
(
valid_batch
)
val_loss
(
loss
)
valid_losses
.
append
(
val_loss
.
result
()
.
numpy
())
# Plot the learning curves
plt
.
plot
(
train_losses
,
label
=
'train'
)
plt
.
plot
(
valid_losses
,
label
=
'valid'
)
plt
.
legend
()
plt
.
xlabel
(
"Epochs"
)
plt
.
ylabel
(
"Negative log likelihood"
)
plt
.
title
(
"Training and validation loss curves"
)
plt
.
show
()
# Plot the data and learned distributions
plt
.
figure
(
figsize
=
(
12
,
5
))
plt
.
plot
(
trainable_dist
.
prob
(
x
),
label
=
'Learned'
)
plt
.
plot
(
base_dist
.
prob
(
x
),
label
=
'Data'
)
plt
.
legend
()
plt
.
show
()
### best result obtained with tuning displayed as above
### radial flows converge fast, more epochs overfit
### DO NOT CHANGE validate_args=True
### DOES NOT USE DATASET YET
### FOR VISUALIZATION PURPOSES IN 2D
### WILL INTEGRATE DATASET AFTER LEARNING THE DISTRIBUTION
if
__name__
==
'__main__'
:
main
()
scripts/flows/realnvp/realnvp.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_probability
as
tfp
tf
.
compat
.
v1
.
disable_eager_execution
()
import
os
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
from
maf
import
MAF
tfd
=
tfp
.
distributions
tfb
=
tfp
.
bijectors
class
RealNVP
:
def
__init__
(
self
,
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
shift_only
,
is_constant_jacobian
,
masked_dimension_count
):
self
.
tf_version
=
tf_version
self
.
dtype
=
dtype
self
.
base_dist
=
base_dist
self
.
dims
=
dims
self
.
params
=
params
self
.
hidden_units
=
hidden_units
self
.
batch_size
=
batch_size
self
.
shift_only
=
shift_only
self
.
is_constant_jacobian
=
is_constant_jacobian
self
.
masked_dimension_count
=
masked_dimension_count
def
get_tf_version
(
self
):
return
self
.
tf_version
def
get_session
(
self
):
return
tf
.
compat
.
v1
.
Session
()
def
get_dims
(
self
,
data
):
return
data
.
shape
[
1
]
def
create_tensor
(
self
,
data
):
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
data
.
astype
(
self
.
dtype
))
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
shuffle
(
buffer_size
=
data
.
shape
[
0
])
dataset
=
dataset
.
prefetch
(
2
*
self
.
batch_size
)
dataset
=
dataset
.
batch
(
self
.
batch_size
)
data_iterator
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
dataset
)
samples
=
data_iterator
.
get_next
()
return
samples
def
override_masked_dimension_count
(
self
,
new_dim
):
self
.
masked_dimension_count
=
new_dim
def
get_shift_scale_func
(
self
,
data
):
func
=
tfb
.
real_nvp_default_template
(
self
.
hidden_units
,
self
.
shift_only
)
return
func
def
make_realnvp
(
self
,
data
):
distribution
=
self
.
base_dist
sample_shape
=
self
.
get_dims
(
data
)
shift_scale_function
=
self
.
get_shift_scale_func
(
data
)
bijector
=
tfb
.
RealNVP
(
num_masked
=
self
.
masked_dimension_count
,
shift_and_log_scale_fn
=
shift_scale_function
,
is_constant_jacobian
=
self
.
is_constant_jacobian
)
realnvp
=
tfd
.
TransformedDistribution
(
tfd
.
Sample
(
distribution
,
sample_shape
),
bijector
)
return
realnvp
\ No newline at end of file
scripts/flows/realnvp/realnvp_execute.py
deleted
100644 → 0
View file @
0da97b56
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
realnvp
import
RealNVP
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set MAF parameters """
batch_size
=
32
dtype
=
np
.
float32
tf_version
=
tf
.
__version__
params
=
2
hidden_units
=
[
512
,
512
]
base_dist
=
tfp
.
distributions
.
Normal
(
loc
=
0.
,
scale
=
1.
)
dims
=
data
.
shape
[
1
]
learning_rate
=
1e-4
""" initialize samples """
realnvp
=
RealNVP
(
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
shift_only
=
True
,
is_constant_jacobian
=
True
,
masked_dimension_count
=
dims
-
1
)
dims
=
realnvp
.
get_dims
(
data
)
samples
=
realnvp
.
create_tensor
(
data
)
print
(
f
'TensorFlow version: {realnvp.tf_version}'
)
print
(
f
'Number of dimensions: {realnvp.dims}'
)
print
(
f
'Learning rate: {learning_rate}'
)
print
(
f
'Number of masked dimensions: {realnvp.masked_dimension_count}
\n
'
)
""" initialize RealNVP """
realnvp
=
realnvp
.
make_realnvp
(
data
)
print
(
'Successfully created model...
\n
'
)
""" initialize loss and optimizer """
loss
=
-
tf
.
reduce_mean
(
realnvp
.
log_prob
(
samples
))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/flows/realnvp/realnvp_optimizer_experiment.py
deleted
100644 → 0
View file @
0da97b56
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
import
numpy
as
np
from
sklearn.preprocessing
import
StandardScaler
import
tensorflow
as
tf
tf
.
compat
.
v1
.
disable_eager_execution
()
import
tensorflow_probability
as
tfp
import
tensorflow.python.util.deprecation
as
deprecation
deprecation
.
_PRINT_DEPRECATION_WARNINGS
=
False
import
matplotlib.pyplot
as
plt
plt
.
style
.
use
(
'seaborn'
)
from
data_loader
import
load_data
from
data_preprocesser
import
preprocess_data
from
realnvp
import
RealNVP
from
experiment
import
Experiment
def
train
(
session
,
loss
,
optimizer
,
steps
=
int
(
1e5
)):
""" optimize for all dimensions """
recorded_steps
=
[]
recorded_losses
=
[]
for
i
in
range
(
steps
):
_
,
loss_per_iteration
=
session
.
run
([
optimizer
,
loss
])
if
i
%
100
==
0
:
recorded_steps
.
append
(
i
)
recorded_losses
.
append
(
loss_per_iteration
)
if
i
%
int
(
1e4
)
==
0
:
print
(
'Iteration {iteration}: {loss}'
.
format
(
iteration
=
i
,
loss
=
loss_per_iteration
))
return
recorded_losses
def
plot_results
(
recorded_losses
):
""" plot loss """
print
(
'Displaying results...'
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
5
))
x
=
np
.
arange
(
len
(
recorded_losses
))
y
=
recorded_losses
m
,
b
=
np
.
polyfit
(
x
,
y
,
1
)
plt
.
scatter
(
x
,
y
,
s
=
10
,
alpha
=
0.3
)
plt
.
plot
(
x
,
m
*
x
+
b
,
c
=
"r"
)
plt
.
title
(
'Loss per 100 iteration'
)
plt
.
xlabel
(
'Iteration'
)
plt
.
ylabel
(
'Loss'
)
plt
.
tight_layout
()
plt
.
show
()
def
main
():
""" load data """
filename
=
'prostate.xls'
directory
=
'/Users/kaanguney.keklikci/Data/'
loader
=
load_data
(
filename
,
directory
)
loader
.
create_directory
(
directory
)
data
=
loader
.
read_data
(
directory
,
filename
)
print
(
'Data successfully loaded...
\n
'
)
""" preprocess data """
fillna_vals
=
[
'sz'
,
'sg'
,
'wt'
]
dropna_vals
=
[
'ekg'
,
'age'
]
drop_vals
=
[
'patno'
,
'sdate'
]
preprocesser
=
preprocess_data
(
StandardScaler
(),
fillna_vals
,
dropna_vals
,
drop_vals
)
data
=
preprocesser
.
dropna_features
(
data
)
data
=
preprocesser
.
impute
(
data
)
data
=
preprocesser
.
drop_features
(
data
)
data
=
preprocesser
.
encode_categorical
(
data
)
data
=
preprocesser
.
scale
(
data
)
print
(
'Data successfully preprocessed...
\n
'
)
""" set MAF parameters """
batch_size
=
32
dtype
=
np
.
float32
tf_version
=
tf
.
__version__
params
=
2
hidden_units
=
[
512
,
512
]
base_dist
=
tfp
.
distributions
.
Normal
(
loc
=
0.
,
scale
=
1.
)
dims
=
data
.
shape
[
1
]
learning_rate
=
1e-4
steps
=
1e4
""" initialize samples """
realnvp
=
RealNVP
(
dtype
,
tf_version
,
batch_size
,
params
,
hidden_units
,
base_dist
,
dims
,
shift_only
=
True
,
is_constant_jacobian
=
True
,
masked_dimension_count
=
dims
-
1
)
dims
=
realnvp
.
get_dims
(
data
)
samples
=
realnvp
.
create_tensor
(
data
)
print
(
f
'TensorFlow version: {realnvp.tf_version}'
)
print
(
f
'Number of dimensions: {realnvp.dims}'
)
print
(
f
'Learning rate: {learning_rate}'
)
print
(
f
'Number of masked dimensions: {realnvp.masked_dimension_count}
\n
'
)
""" initialize RealNVP """
realnvp
=
realnvp
.
make_realnvp
(
data
)
print
(
'Successfully created model...
\n
'
)
""" initialize loss and optimizer """
loss
=
-
tf
.
reduce_mean
(
realnvp
.
log_prob
(
samples
))
optimizer
=
tf
.
compat
.
v1
.
train
.
AdamOptimizer
(
learning_rate
)
.
minimize
(
loss
)
experiment
=
Experiment
(
optimizer
,
learning_rate
,
loss
,
steps
)
keywords
=
[
'adam'
,
'rmsprop'
,
'sgd'
]
for
keyword
in
keywords
:
session
=
tf
.
compat
.
v1
.
Session
()
tf
.
compat
.
v1
.
set_random_seed
(
42
)
experiment
.
change_optimizer
(
learning_rate
,
loss
,
keyword
=
keyword
)
optimizer
=
experiment
.
get_optimizer
()
session
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
print
(
'Optimizer and loss successfully defined...
\n
'
)
""" start training """
recorded_losses
=
train
(
session
,
loss
,
optimizer
)
print
(
'Training finished...
\n
'
)
""" display results """
plot_results
(
recorded_losses
)
if
__name__
==
"__main__"
:
main
()
scripts/preprocessing/data_loader.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
pandas
as
pd
from
pathlib
import
Path
import
xlrd
class
load_data
:
def
__init__
(
self
,
filename
,
directory
):
self
.
filename
=
filename
self
.
directory
=
directory
def
create_directory
(
self
,
directory
):
Path
(
directory
)
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
def
read_data
(
self
,
directory
,
filename
):
data_dir
=
self
.
directory
+
self
.
filename
wb
=
xlrd
.
open_workbook
(
data_dir
,
encoding_override
=
'iso-8859-1'
)
return
pd
.
read_excel
(
wb
)
\ No newline at end of file
scripts/preprocessing/data_preprocesser.py
deleted
100644 → 0
View file @
0da97b56
import
numpy
as
np
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
class
preprocess_data
:
def
__init__
(
self
,
scaler
,
fillna_vals
,
dropna_vals
,
drop_vals
):
self
.
scaler
=
scaler
self
.
fillna_vals
=
fillna_vals
self
.
dropna_vals
=
dropna_vals
self
.
drop_vals
=
drop_vals
def
dropna_features
(
self
,
data
):
data
=
data
.
dropna
(
subset
=
self
.
dropna_vals
)
return
data
def
impute
(
self
,
data
):
for
feature
in
self
.
fillna_vals
:
data
[
feature
]
=
data
[
feature
]
.
fillna
(
value
=
np
.
mean
(
data
[
feature
]))
return
data
def
drop_features
(
self
,
data
):
data
.
drop
(
self
.
drop_vals
,
axis
=
1
,
inplace
=
True
)
data
.
reset_index
(
drop
=
True
,
inplace
=
True
)
return
data
def
encode_categorical
(
self
,
data
):
data
=
pd
.
get_dummies
(
data
)
return
data
def
scale
(
self
,
data
):
columns
=
data
.
columns
data
=
self
.
scaler
.
fit_transform
(
data
)
data
=
pd
.
DataFrame
(
data
,
columns
=
columns
)
return
data
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment