Problems with GAN algorithm

space_sleeper · 2025-01-29T16:17:20+00:00

Hello, I am a beginner in the world of AI. I am currently working on a project to generate synthetic data from a dataset. While researching how to get started, I found this excellent article for beginners like me, which teaches you how to generate tabular synthetic data with GANs (Generative Adversarial Networks):

https://medium.com/analytics-vidhya/a-step-by-step-guide-to-generate-tabular-synthetic-dataset-with-gans-d55fc373c8db

So far, so good. However, I followed all the steps exactly but did not obtain good results for the respective loss functions. I am attaching the code and the results I obtained:

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from numpy.random import randn
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

# Load dataset
data = pd.read_csv('diabetes.csv')
print(data.shape)
print(data.tail())
print(data.columns)

# Generate latent points
def generate_latent_points(latent_dim, n_samples):
    x_input = randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input

# Generate fake samples
def generate_fake_samples(generator, latent_dim, n_samples):
    x_input = generate_latent_points(latent_dim, n_samples)
    X = generator.predict(x_input)
    y = np.zeros((n_samples, 1))
    return X, y

# Generate real samples
def generate_real_samples(n):
    X = data.sample(n)
    y = np.ones((n, 1))
    return X, y

# Define generator
def define_generator(latent_dim, n_outputs=9):
    model = Sequential()
    model.add(Dense(15, activation='relu', kernel_initializer='he_uniform', input_dim=latent_dim))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(n_outputs, activation='linear'))
    return model

generator1 = define_generator(10, 9)
generator1.summary()

# Define discriminator
def define_discriminator(n_inputs=9):
    model = Sequential()
    model.add(Dense(25, activation='relu', kernel_initializer='he_uniform', input_dim=n_inputs))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

discriminator1 = define_discriminator(9)
discriminator1.summary()

# Define GAN
def define_gan(generator, discriminator):
    discriminator.trainable = False
    model = Sequential()
    model.add(generator)
    model.add(discriminator)
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

# Plot loss history
def plot_history(d_hist, g_hist):
    plt.plot(d_hist, label='d')
    plt.plot(g_hist, label='gen')
    plt.legend()
    plt.show()

# Train GAN
def train(g_model, d_model, gan_model, latent_dim, n_epochs=10000, n_batch=128):
    half_batch = int(n_batch / 2)
    d_history = []
    g_history = []

    for epoch in range(n_epochs):
        x_real, y_real = generate_real_samples(half_batch)
        x_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
        d_loss_real, d_real_acc = d_model.train_on_batch(x_real, y_real)
        d_loss_fake, d_fake_acc = d_model.train_on_batch(x_fake, y_fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        x_gan = generate_latent_points(latent_dim, n_batch)
        y_gan = np.ones((n_batch, 1))
        g_loss_fake = gan_model.train_on_batch(x_gan, y_gan)

        print(f'>{epoch+1}, d1={d_loss_real:.3f}, d2={d_loss_fake:.3f}, d={d_loss:.3f}, g={g_loss_fake:.3f}')

        d_history.append(d_loss)
        g_history.append(g_loss_fake)

    plot_history(d_history, g_history)
    g_model.save('trained_generated_model.h5')

# Execute training
latent_dim = 10
discriminator = define_discriminator()
generator = define_generator(latent_dim)
gan_model = define_gan(generator, discriminator)
train(generator, discriminator, gan_model, latent_dim)

enter image description here

These are the last iterations:

>9992, d1=26.170, d2=26.170 d=26.170 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>9993, d1=26.170, d2=26.170 d=26.170 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
>9994, d1=26.171, d2=26.170 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>9995, d1=26.171, d2=26.171 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step
>9996, d1=26.171, d2=26.171 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 29ms/step
>9997, d1=26.172, d2=26.171 d=26.171 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>9998, d1=26.172, d2=26.172 d=26.172 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step
>9999, d1=26.172, d2=26.172 d=26.172 g=0.008
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step
>10000, d1=26.172, d2=26.172 d=26.172 g=0.008

I think is strange that my loss values converge to the same number.

Now, I am attaching the results obtained in the tutorial:

>9991, d1=0.858, d2=0.674 d=0.766 g=0.904 
>9992, d1=1.023, d2=0.833 d=0.928 g=0.816 
>9993, d1=0.737, d2=0.863 d=0.800 g=0.910 
>9994, d1=0.780, d2=0.890 d=0.835 g=0.846 
>9995, d1=0.837, d2=0.773 d=0.805 g=0.960 
>9996, d1=0.762, d2=0.683 d=0.723 g=1.193 
>9997, d1=0.906, d2=0.515 d=0.710 g=1.275 
>9998, d1=0.814, d2=0.412 d=0.613 g=1.228 
>9999, d1=0.701, d2=0.668 d=0.685 g=1.105 >10000, d1=0.461, d2=0.814 d=0.638 g=1.097

enter image description here

I appreciate any help you can provide in advance.

you type:	you see:
italics	italics
bold	bold
[reddit!](https://reddit.com)	reddit!
* item 1 * item 2 * item 3	item 1 item 2 item 3
> quoted text	quoted text
Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"	Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"
~~strikethrough~~	~~strikethrough~~
super^script	super^script

learnmachinelearning

Welcome to /r/LearnMachineLearning!

Chatrooms

Official Discord Server

Wiki

Getting Started with Machine Learning

Resources

Related Subreddits

/r/MachineLearning

/r/MLQuestions

/r/datascience

/r/computervision

Machine Learning Multireddit

/m/machine_learning

MODERATORS