Of course! "Grouping images" can mean several different things in Python. I'll cover the most common interpretations with clear code examples.

Here are the main ways you might want to group images:
- Group by File Properties: Grouping images based on metadata like creation date, size, or camera model.
- Group by Visual Similarity (Clustering): Grouping images that look similar to each other (e.g., all photos of a cat, all pictures of a sunset).
- Group into a Single Image (Mosaics/Collages): Combining multiple images into one larger image file.
- Group by Face Detection: Grouping images that contain the same person's face.
Let's dive into each one.
Grouping Images by File Properties (Using os and Pillow)
This is the most straightforward method. You can group images based on any information you can extract from the file system or the image file itself.
Common Grouping Keys:

- Creation Date: Group all images taken on the same day.
- File Size: Group images into "small," "medium," and "large" buckets.
- Dimensions: Group by resolution (e.g., "thumbnails," "HD," "4K").
- Camera Model (EXIF data): Group all photos taken with the same camera.
Example: Grouping Images by Creation Date
This script will scan a directory, find all images, and create subdirectories named by the date the photo was taken, moving the images into them.
Prerequisites:
You'll need the Pillow library to handle image metadata robustly.
pip install Pillow
Code:
import os
import shutil
from datetime import datetime
from PIL import Image
def group_images_by_creation_date(source_dir, target_dir):
"""
Groups images in a source directory into subdirectories in a target directory
based on their creation date.
"""
if not os.path.exists(target_dir):
os.makedirs(target_dir)
# Supported image extensions
image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff')
for filename in os.listdir(source_dir):
if filename.lower().endswith(image_extensions):
filepath = os.path.join(source_dir, filename)
try:
# Get creation date from file system (fastest)
# timestamp = os.path.getctime(filepath)
# creation_date = datetime.fromtimestamp(timestamp).date()
# Get creation date from EXIF data (more accurate for photos)
# Note: Not all images have EXIF data
with Image.open(filepath) as img:
exif_data = img._getexif()
if exif_data and 36867 in exif_data: # 36867 is the 'DateTimeOriginal' tag
date_str = exif_data[36867]
creation_date = datetime.strptime(date_str, '%Y:%m:%d %H:%M:%S').date()
else:
# Fallback to file system creation date if no EXIF
timestamp = os.path.getctime(filepath)
creation_date = datetime.fromtimestamp(timestamp).date()
# Create a directory for the date (e.g., "2025-10-27")
date_folder = os.path.join(target_dir, str(creation_date))
if not os.path.exists(date_folder):
os.makedirs(date_folder)
# Move the image to the corresponding date folder
shutil.move(filepath, os.path.join(date_folder, filename))
print(f"Moved {filename} to {date_folder}")
except Exception as e:
print(f"Could not process {filename}. Error: {e}")
# --- Usage ---
source_directory = "path/to/your/unsorted_photos"
target_directory = "path/to/your/sorted_photos_by_date"
group_images_by_creation_date(source_directory, target_directory)
Grouping Images by Visual Similarity (Using scikit-learn)
This is a more advanced technique. We use machine learning to "embed" each image into a numerical vector (a list of numbers) that represents its visual content. Images with similar content will have vectors that are close to each other in space. We then use a clustering algorithm to group these similar vectors.

Prerequisites:
pip install scikit-learn numpy pillow
Concept:
- Feature Extraction: Convert each image into a feature vector. We'll use a simple method (flattening pixel values) for this example, but more advanced methods like CNNs are much more powerful.
- Dimensionality Reduction (Optional but Recommended): Reduce the number of features to make clustering faster and more effective. We'll use
PCA. - Clustering: Group the vectors into clusters using an algorithm like
KMeans.
Code:
import os
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
def load_and_preprocess_images(image_dir, image_size=(64, 64), max_images=100):
"""Loads images, resizes them, and converts them to feature vectors."""
image_vectors = []
image_filenames = []
# Supported image extensions
image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')
count = 0
for filename in os.listdir(image_dir):
if filename.lower().endswith(image_extensions) and count < max_images:
try:
filepath = os.path.join(image_dir, filename)
img = Image.open(filepath).convert('RGB') # Ensure 3 channels (RGB)
img = img.resize(image_size)
# Convert image to a numpy array and flatten it
img_array = np.array(img)
vector = img_array.flatten()
image_vectors.append(vector)
image_filenames.append(filename)
count += 1
except Exception as e:
print(f"Skipping {filename} due to error: {e}")
return np.array(image_vectors), image_filenames
def cluster_images(image_vectors, n_clusters=5):
"""Clusters image vectors using K-Means."""
# Standardize the features (important for PCA and K-Means)
scaler = StandardScaler()
scaled_vectors = scaler.fit_transform(image_vectors)
# Reduce dimensionality to make clustering faster and more effective
pca = PCA(n_components=50) # Reduce to 50 components
reduced_vectors = pca.fit_transform(scaled_vectors)
# Perform K-Means clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
clusters = kmeans.fit_predict(reduced_vectors)
return clusters
# --- Usage ---
image_directory = "path/to/your/images"
num_clusters = 5 # You can choose this number or use methods to find the optimal one
# 1. Load and preprocess images
# WARNING: Loading many large images can consume a lot of memory.
# For a large dataset, use a generator or process in batches.
image_vectors, filenames = load_and_preprocess_images(image_directory, max_images=200)
if len(image_vectors) > 0:
print(f"Loaded {len(image_vectors)} images.")
# 2. Cluster the images
cluster_labels = cluster_images(image_vectors, n_clusters=num_clusters)
# 3. Group filenames by their cluster label
grouped_images = {}
for i, label in enumerate(cluster_labels):
if label not in grouped_images:
grouped_images[label] = []
grouped_images[label].append(filenames[i])
# 4. Print the groups
for cluster_id, image_list in grouped_images.items():
print(f"\n--- Cluster {cluster_id} ({len(image_list)} images) ---")
for img_name in image_list:
print(f" - {img_name}")
else:
print("No images found or could be loaded in the directory.")
Grouping Images into a Single Image (Mosaic/Collage)
This involves reading multiple images and drawing them onto a larger canvas.
Prerequisites:
pip install Pillow
Code:
import os
from PIL import Image
def create_collage(image_dir, output_path, grid_size=(3, 3), image_size=(300, 300)):
"""
Creates a simple grid collage from images in a directory.
"""
images = []
image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')
# Load all images
for filename in os.listdir(image_dir):
if filename.lower().endswith(image_extensions):
try:
img_path = os.path.join(image_dir, filename)
img = Image.open(img_path)
img = img.resize(image_size)
images.append(img)
except Exception as e:
print(f"Could not load {filename}: {e}")
if not images:
print("No images found to create a collage.")
return
# Determine the size of the final collage
cols, rows = grid_size
collage_width = cols * image_size[0]
collage_height = rows * image_size[1]
# Create a new blank image for the collage
collage = Image.new('RGB', (collage_width, collage_height), color=(255, 255, 255)) # White background
# Paste images onto the collage
for i, img in enumerate(images):
if i >= cols * rows: # Stop if we have more images than grid slots
break
row = i // cols
col = i % cols
x = col * image_size[0]
y = row * image_size[1]
collage.paste(img, (x, y))
# Save the final collage
collage.save(output_path)
print(f"Collage saved to {output_path}")
# --- Usage ---
source_directory = "path/to/your/images_for_collage"
output_file = "collage.jpg"
# Create a 3x3 grid with images resized to 300x300
create_collage(source_directory, output_file, grid_size=(3, 3), image_size=(300, 300))
Grouping Images by Face Detection (Using face_recognition)
This is a very practical application. It finds all faces in a set of images and groups the images that contain the same person.
Prerequisites:
This library can be tricky to install. It depends on dlib. Follow the official instructions: face_recognition installation guide.
pip install face_recognition
Code:
import face_recognition
import glob
import os
def get_face_encodings(image_path):
"""Loads an image and returns the encoding for the first face found."""
try:
image = face_recognition.load_image_file(image_path)
face_locations = face_recognition.face_locations(image)
if not face_locations:
return None # No faces found
# Get encodings for all found faces (we'll just take the first one)
face_encodings = face_recognition.face_encodings(image, face_locations)
return face_encodings[0]
except Exception as e:
print(f"Error processing {image_path}: {e}")
return None
def group_faces_by_image(image_dir, tolerance=0.6):
"""
Groups images based on whether they contain the same face(s).
Returns a list of groups, where each group is a list of image paths.
"""
image_paths = glob.glob(os.path.join(image_dir, "*.[pP][nN][gG]")) # Adjust for other formats
if not image_paths:
print("No PNG images found in the directory.")
return []
all_face_encodings = []
image_to_encodings_map = {}
# Step 1: Find all faces and their encodings
print("Step 1: Finding faces in images...")
for path in image_paths:
encoding = get_face_encodings(path)
if encoding is not None:
all_face_encodings.append(encoding)
image_to_encodings_map[path] = [encoding]
if not all_face_encodings:
print("No faces found in any images.")
return []
# Step 2: Compare all faces to each other to find matches
print("Step 2: Comparing faces to find groups...")
known_encodings = all_face_encodings
image_groups = []
processed_images = set()
for i, path in enumerate(image_paths):
if path not in image_to_encodings_map or path in processed_images:
continue
current_encoding = image_to_encodings_map[path][0]
# Compare this face against all other known faces
matches = face_recognition.compare_faces(known_encodings, current_encoding, tolerance=tolerance)
# Find the indices of all matches
match_indices = [j for j, match in enumerate(matches) if match]
# Get the original image paths for all matches
group = [list(image_to_encodings_map.keys())[j] for j in match_indices]
if group:
image_groups.append(group)
# Mark these images as processed so they aren't used as a new group starter
for p in group:
processed_images.add(p)
return image_groups
# --- Usage ---
image_directory = "path/to/your/photos_with_faces"
face_tolerance = 0.5 # Lower is stricter (more similar). 0.6 is a good default.
groups = group_faces_by_image(image_directory, tolerance=face_tolerance)
if groups:
print("\n--- Face Grouping Results ---")
for i, group in enumerate(groups):
print(f"\nGroup {i+1} ({len(group)} images):")
for img_path in group:
print(f" - {os.path.basename(img_path)}")
Summary
| Method | Use Case | Key Libraries | Complexity |
|---|---|---|---|
| File Properties | Organizing photos by date, camera, etc. | os, shutil, Pillow |
Low |
| Visual Similarity | Finding all similar-looking images (e.g., all cats) | scikit-learn, Pillow |
Medium |
| Mosaic/Collage | Creating a single combined image | Pillow |
Low |
| Face Detection | Grouping photos of the same person | face_recognition |
Medium |
Choose the method that best fits your goal! For most general photo organization, Grouping by File Properties is the most useful. For more complex analysis, Grouping by Visual Similarity or Face Detection are powerful tools.
