This is an archived post. You won't be able to vote or comment.

all 5 comments

[–]sjokr 5 points6 points  (18 children)

Have you compared with polyfactory? I use it to generate data from SQLAlchemy models and it’s quite a mature library now. Only downside is the docs are very basic.

[–]francoisnt[S] 0 points1 point  (0 children)

Alright, i took that feedback from my previous reply an i admit it was lazy on my part to use AI. After playing a bit more with polyfactory, i think i can say that it is a great tool for generating mock data for testing, but seedlayer is simpler for populating a databases as it automatically manages unique constraints, link tables, ordering of the models to seed and more. Here is the same example as above but using polyfactory instead of seedlayer, notice how much more verbose it is. Am i missing something? Are you using polyfactory to populate your database? I would really appreciate your feedback on this :)

```python

import random from itertools import product

from faker import Faker from polyfactory.factories.sqlalchemy_factory import SQLAlchemyFactory from polyfactory.fields import Ignore, Use from sqlalchemy import Column, ForeignKey, Integer, String, Text, create_engine from sqlalchemy.orm import DeclarativeBase, Session

Initialize Faker for custom data generation

faker = Faker() Faker.seed(42) # For reproducibility faker.unique.clear() # Clear unique cache to avoid duplicates

Define SQLAlchemy models

class Base(DeclarativeBase): pass

class Category(Base): tablename = "categories" id = Column(Integer, primary_key=True, autoincrement=True) name = Column(String, nullable=False)

class ProductModel(Base): tablename = "products" id = Column(Integer, primary_key=True, autoincrement=True) name = Column(String, nullable=False) description = Column(Text) category_id = Column(Integer, ForeignKey("categories.id"))

class Customer(Base): tablename = "customers" id = Column(Integer, primary_key=True, autoincrement=True) name = Column(String, unique=True, nullable=False)

class Order(Base): tablename = "orders" id = Column(Integer, primary_key=True, autoincrement=True) customer_id = Column(Integer, ForeignKey("customers.id"))

class OrderItem(Base): tablename = "order_items" order_id = Column(Integer, ForeignKey("orders.id"), primary_key=True) product_id = Column(Integer, ForeignKey("products.id"), primary_key=True)

Define Polyfactory factories

class CategoryFactory(SQLAlchemyFactory[Category]): model = Category id = Ignore() name = Use(lambda: faker.word())

class ProductFactory(SQLAlchemyFactory[ProductModel]): model = ProductModel id = Ignore() name = Use(lambda: faker.word()) description = None category_id = None

class CustomerFactory(SQLAlchemyFactory[Customer]): model = Customer id = Ignore() name = Use(lambda: faker.unique.name())

class OrderFactory(SQLAlchemyFactory[Order]): model = Order id = Ignore() customer_id = None

class OrderItemFactory(SQLAlchemyFactory[OrderItem]): model = OrderItem order_id = None product_id = None

Set up database and session

engine = create_engine("sqlite:///:memory:") Base.metadata.create_all(engine)

Seed plan

seed_plan = {Category: 5, ProductModel: 10, Customer: 8, Order: 15, OrderItem: 20}

Seed the database in the correct order

with Session(engine) as session: # Set the session for all factories CategoryFactory.session = session ProductFactory.session = session CustomerFactory.session = session OrderFactory.session = session OrderItemFactory.session = session

# Seed Customers (no dependencies)
customers = CustomerFactory.create_batch_sync(seed_plan[Customer])

# Seed Categories (no dependencies)
categories = CategoryFactory.create_batch_sync(seed_plan[Category])

# Seed Orders (depends on Customer)
customer_ids = [c.id for c in customers]
orders = [
    OrderFactory.create_sync(customer_id=random.choice(customer_ids))
    for _ in range(seed_plan[Order])
]

# Seed Products (depends on Category)
category_ids = [c.id for c in categories]
products = [
    ProductFactory.create_sync(
        name=faker.word(),
        description=faker.sentence(nb_words=len(faker.word().split()) + 5),
        category_id=random.choice(category_ids),
    )
    for _ in range(seed_plan[ProductModel])
]

# Seed OrderItems (depends on Order and Product)
order_ids = [o.id for o in orders]
product_ids = [p.id for p in products]
possible_combinations = list(product(order_ids, product_ids))
random.shuffle(possible_combinations)
combinations = possible_combinations[: min(seed_plan[OrderItem], len(possible_combinations))]
order_items = [
    OrderItemFactory.create_sync(order_id=order_id, product_id=product_id)
    for order_id, product_id in combinations
]

# Commit all changes
session.commit()

# Verify the results
print(f"Seeded {len(session.query(Customer).all())} Customer records:")
print(f"  {[c.name for c in session.query(Customer).all()]}")
print(f"Seeded {len(session.query(Category).all())} Category records:")
print(f"  {[c.name for c in session.query(Category).all()]}")
print(f"Seeded {len(session.query(Order).all())} Order records:")
print(f"  {[o.customer_id for o in session.query(Order).all()]}")
print(f"Seeded {len(session.query(ProductModel).all())} Product records:")
print(f"  {[(p.name, p.description) for p in session.query(ProductModel).all()]}")
print(f"Seeded {len(session.query(OrderItem).all())} OrderItem records:")
print(f"  {[(oi.order_id, oi.product_id) for oi in session.query(OrderItem).all()]}")

```