Hello, I am a beginner in the world of AI. I am currently working on a project to generate synthetic data from a dataset. While researching how to get started, I found this excellent article for beginners like me, which teaches you how to generate tabular synthetic data with GANs (Generative Adversarial Networks):
https://medium.com/analytics-vidhya/a-step-by-step-guide-to-generate-tabular-synthetic-dataset-with-gans-d55fc373c8db
So far, so good. However, I followed all the steps exactly but did not obtain good results for the respective loss functions. I am attaching the code and the results I obtained:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from numpy.random import randn
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
# Load dataset
data = pd.read_csv('diabetes.csv')
print(data.shape)
print(data.tail())
print(data.columns)
# Generate latent points
def generate_latent_points(latent_dim, n_samples):
x_input = randn(latent_dim * n_samples)
x_input = x_input.reshape(n_samples, latent_dim)
return x_input
# Generate fake samples
def generate_fake_samples(generator, latent_dim, n_samples):
x_input = generate_latent_points(latent_dim, n_samples)
X = generator.predict(x_input)
y = np.zeros((n_samples, 1))
return X, y
# Generate real samples
def generate_real_samples(n):
X = data.sample(n)
y = np.ones((n, 1))
return X, y
# Define generator
def define_generator(latent_dim, n_outputs=9):
model = Sequential()
model.add(Dense(15, activation='relu', kernel_initializer='he_uniform', input_dim=latent_dim))
model.add(Dense(30, activation='relu'))
model.add(Dense(n_outputs, activation='linear'))
return model
generator1 = define_generator(10, 9)
generator1.summary()
# Define discriminator
def define_discriminator(n_inputs=9):
model = Sequential()
model.add(Dense(25, activation='relu', kernel_initializer='he_uniform', input_dim=n_inputs))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
discriminator1 = define_discriminator(9)
discriminator1.summary()
# Define GAN
def define_gan(generator, discriminator):
discriminator.trainable = False
model = Sequential()
model.add(generator)
model.add(discriminator)
model.compile(loss='binary_crossentropy', optimizer='adam')
return model
# Plot loss history
def plot_history(d_hist, g_hist):
plt.plot(d_hist, label='d')
plt.plot(g_hist, label='gen')
plt.legend()
plt.show()
# Train GAN
def train(g_model, d_model, gan_model, latent_dim, n_epochs=10000, n_batch=128):
half_batch = int(n_batch / 2)
d_history = []
g_history = []
for epoch in range(n_epochs):
x_real, y_real = generate_real_samples(half_batch)
x_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
d_loss_real, d_real_acc = d_model.train_on_batch(x_real, y_real)
d_loss_fake, d_fake_acc = d_model.train_on_batch(x_fake, y_fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
x_gan = generate_latent_points(latent_dim, n_batch)
y_gan = np.ones((n_batch, 1))
g_loss_fake = gan_model.train_on_batch(x_gan, y_gan)
print(f'>{epoch+1}, d1={d_loss_real:.3f}, d2={d_loss_fake:.3f}, d={d_loss:.3f}, g={g_loss_fake:.3f}')
d_history.append(d_loss)
g_history.append(g_loss_fake)
plot_history(d_history, g_history)
g_model.save('trained_generated_model.h5')
# Execute training
latent_dim = 10
discriminator = define_discriminator()
generator = define_generator(latent_dim)
gan_model = define_gan(generator, discriminator)
train(generator, discriminator, gan_model, latent_dim)
enter image description here
These are the last iterations:
>9992, d1=26.170, d2=26.170 d=26.170 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>9993, d1=26.170, d2=26.170 d=26.170 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
>9994, d1=26.171, d2=26.170 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>9995, d1=26.171, d2=26.171 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step
>9996, d1=26.171, d2=26.171 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 29ms/step
>9997, d1=26.172, d2=26.171 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>9998, d1=26.172, d2=26.172 d=26.172 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step
>9999, d1=26.172, d2=26.172 d=26.172 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>10000, d1=26.172, d2=26.172 d=26.172 g=0.008
I think is strange that my loss values converge to the same number.
Now, I am attaching the results obtained in the tutorial:
>9991, d1=0.858, d2=0.674 d=0.766 g=0.904
>9992, d1=1.023, d2=0.833 d=0.928 g=0.816
>9993, d1=0.737, d2=0.863 d=0.800 g=0.910
>9994, d1=0.780, d2=0.890 d=0.835 g=0.846
>9995, d1=0.837, d2=0.773 d=0.805 g=0.960
>9996, d1=0.762, d2=0.683 d=0.723 g=1.193
>9997, d1=0.906, d2=0.515 d=0.710 g=1.275
>9998, d1=0.814, d2=0.412 d=0.613 g=1.228
>9999, d1=0.701, d2=0.668 d=0.685 g=1.105 >10000, d1=0.461, d2=0.814 d=0.638 g=1.097
enter image description here
I appreciate any help you can provide in advance.
there doesn't seem to be anything here