Of course! The term "get or create" is a very common pattern in programming, especially when dealing with databases. It refers to a function that tries to retrieve an object from a database. If the object exists, it returns it. If it doesn't exist, it creates a new one, saves it to the database, and then returns the new object.

This is essential for avoiding duplicate data and ensuring data integrity.
Here’s a complete guide on how to implement "get or create" in Python, covering the most common scenarios.
The Classic Pattern (Without an ORM)
This is the fundamental logic that can be adapted to any database or ORM. The key is to perform the "get" and "create" operations in a single transaction to prevent race conditions.
Imagine you're using a database like SQLite and a simple dictionary-like structure.

# A very simple in-memory "database"
# In a real app, this would be a database connection
db = {}
db_counter = 1
def get_or_create(model_class, defaults=None, **kwargs):
"""
A generic get_or_create function.
:param model_class: The class of the object to get/create.
:param defaults: A dictionary of values to use for creating the object if it doesn't exist.
:param kwargs: Fields to look up the object by.
:return: A tuple of (object, created), where 'created' is a boolean.
"""
# --- GET ---
# Look for an object with the given kwargs
# In a real DB, this would be a query like: SELECT * FROM table WHERE key1=val1 AND key2=val2
key = tuple(sorted(kwargs.items()))
obj = db.get(key)
if obj:
return obj, False # Found, not created
# --- CREATE ---
# If not found, create it
if defaults:
kwargs.update(defaults)
new_obj = model_class(**kwargs)
# In a real DB, this would be an INSERT statement
db[tuple(sorted(kwargs.items()))] = new_obj
print(f"Created new object: {new_obj}")
return new_obj, True
# --- Example Usage ---
class User:
def __init__(self, username, email, is_active=True):
self.id = db_counter
self.username = username
self.email = email
self.is_active = is_active
global db_counter
db_counter += 1
def __repr__(self):
return f"<User(id={self.id}, username='{self.username}')>"
# First call: User 'alice' doesn't exist, so she is created.
alice, created = get_or_create(User, username='alice', email='alice@example.com')
print(f"Result: {alice}, Created: {created}")
# Output:
# Created new object: <User(id=1, username='alice')>
# Result: <User(id=1, username='alice')>, Created: True
# Second call: User 'alice' already exists, so we get her back.
alice_again, created_again = get_or_create(User, username='alice', email='alice@example.com')
print(f"Result: {alice_again}, Created: {created_again}")
# Output:
# Result: <User(id=1, username='alice')>, Created: False
# Example with 'defaults': If the user exists, defaults are ignored.
# If the user doesn't exist, defaults are used for creation.
bob, created_bob = get_or_create(
User,
username='bob',
defaults={'email': 'bob@default.com', 'is_active': False}
)
print(f"Result: {bob}, Created: {created_bob}")
# Output:
# Created new object: <User(id=2, username='bob')>
# Result: <User(id=2, username='bob')>, Created: True
# Now try to get 'bob' again, but with a different default for 'is_active'
bob_again, created_bob_again = get_or_create(
User,
username='bob',
defaults={'is_active': True} # This will be ignored
)
print(f"Result: {bob_again}, Created: {created_bob_again}, is_active: {bob_again.is_active}")
# Output:
# Result: <User(id=2, username='bob')>, Created: False, is_active: False
The Recommended Way: Using an ORM (Django)
If you're using the Django web framework, this pattern is built-in and is the standard, safest way to handle it. Django's ORM handles the transaction and race condition prevention for you.
Key Point: Django's get_or_create uses select_for_update() under the hood to lock the database row, preventing another process from creating a duplicate object at the same time.
# In a Django shell or management command
from myapp.models import User # Assuming you have a User model
# Django's get_or_create returns a tuple: (object, created)
try:
# The lookup is done using the keyword arguments
# The 'defaults' dictionary provides values for the new object if it's created
user, created = User.objects.get_or_create(
username='charlie',
defaults={'email': 'charlie@example.com', 'is_active': True}
)
if created:
print(f"Successfully created new user: {user}")
else:
print(f"User already exists: {user}")
except Exception as e:
print(f"An error occurred: {e}")
# --- What happens on a second call? ---
user_again, created_again = User.objects.get_or_create(
username='charlie',
defaults={'email': 'charlie@new.com'} # This email will NOT be used
)
print(f"Second call result: {user_again}, Created: {created_again}")
# The 'user_again' object will have the original email 'charlie@example.com'
# because the object was not created, so defaults were ignored.
The Modern Way: Using SQLAlchemy (SQLAlchemy 2.0+)
SQLAlchemy, a popular ORM for Python, also has a built-in get_or_create-like method. The most robust way is to use a combination of session.get() and session.add().
import sqlalchemy
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.orm import sessionmaker, declarative_base
# Setup
Base = declarative_base()
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
username = Column(String, unique=True)
email = Column(String)
def __repr__(self):
return f"<User(id={self.id}, username='{self.username}')>"
# In-memory SQLite database for this example
engine = create_engine('sqlite:///:memory:')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
# --- The SQLAlchemy get_or_create pattern ---
def get_or_create(session, model, defaults=None, **kwargs):
"""
SQLAlchemy implementation of get_or_create.
:param session: The SQLAlchemy session.
:param model: The model class.
:param defaults: Dictionary of default values for creation.
:param kwargs: Fields to look up the object by.
:return: A tuple of (instance, created).
"""
# --- GET ---
# session.get() is the modern, preferred way to fetch by primary key.
# For a more generic lookup, you'd build a query.
# For this example, let's assume 'username' is our unique lookup key.
instance = session.query(model).filter_by(**kwargs).one_or_none()
if instance:
return instance, False
# --- CREATE ---
if defaults:
kwargs = {**defaults, **kwargs}
instance = model(**kwargs)
session.add(instance)
session.commit() # Commit the transaction to save the new object
print(f"Created new object: {instance}")
return instance, True
# --- Example Usage ---
# First call: User 'david' doesn't exist
david, created = get_or_create(session, User, username='david', email='david@example.com')
print(f"Result: {david}, Created: {created}")
# Output:
# Created new object: <User(id=1, username='david')>
# Result: <User(id=1, username='david')>, Created: True
# Second call: User 'david' exists
david_again, created_again = get_or_create(session, User, username='david')
print(f"Result: {david_again}, Created: {created_again}")
# Output:
# Result: <User(id=1, username='david')>, Created: False
Why is a Single Transaction Important? (Race Conditions)
Consider what happens without a transaction:
- Process A queries for a user named "eve". The user doesn't exist.
- Process B (running at the same time) also queries for a user named "eve". The user doesn't exist.
- Process A creates the user "eve" and saves it.
- Process B creates the user "eve" and saves it.
Result: You have two "eve" users in your database, violating your unique constraint.
ORMs like Django and SQLAlchemy solve this by locking the database row during the "get" part of the operation, ensuring only one process can proceed to the "create" part.
Summary: Which One to Use?
| Scenario | Recommended Method | Why? |
|---|---|---|
| Django Project | Model.objects.get_or_create(...) |
It's the idiomatic, built-in, and safest way. Handles race conditions automatically. |
| SQLAlchemy Project | session.query(...).one_or_none() + session.add() |
This is the standard, explicit pattern in SQLAlchemy. It's clear and gives you control. |
| Raw SQL / No ORM | Implement the get then create logic inside a database transaction. |
You must manage the transaction yourself to prevent race conditions. Use BEGIN, SELECT, and conditional INSERT statements. |
| Simple In-Memory | The basic Python dictionary example. | Good for understanding the core logic, but not for production applications where data persistence matters. |
