Database Design: SQL vs NoSQL
Introduction
Choosing between SQL and NoSQL databases is critical for application success. This guide covers PostgreSQL (SQL) and MongoDB (NoSQL) design patterns, schema design, indexing strategies, normalization vs denormalization, and when to use each database type.
1. SQL vs NoSQL Overview
| Aspect | SQL (PostgreSQL) | NoSQL (MongoDB) |
|---|---|---|
| Data Model | Tables, rows, columns | Collections, documents (JSON) |
| Schema | Fixed schema, defined upfront | Flexible schema, dynamic |
| Relationships | Foreign keys, JOINs | Embedded docs or references |
| Transactions | ACID compliant | Limited (single doc or multi-doc) |
| Scaling | Vertical scaling (scale up) | Horizontal scaling (scale out) |
| Best For | Complex queries, reports, consistency | Rapid development, flexible data, scale |
2. PostgreSQL Schema Design
-- E-commerce database schema
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email VARCHAR(255) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE products (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL,
description TEXT,
price DECIMAL(10, 2) NOT NULL CHECK (price >= 0),
stock_quantity INTEGER NOT NULL DEFAULT 0,
category_id UUID REFERENCES categories(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE categories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(100) UNIQUE NOT NULL,
parent_id UUID REFERENCES categories(id), -- Self-referencing for hierarchy
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE orders (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
status VARCHAR(50) NOT NULL DEFAULT 'pending',
total_amount DECIMAL(10, 2) NOT NULL,
shipping_address JSONB NOT NULL, -- Store JSON data in SQL
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE order_items (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
order_id UUID NOT NULL REFERENCES orders(id) ON DELETE CASCADE,
product_id UUID NOT NULL REFERENCES products(id),
quantity INTEGER NOT NULL CHECK (quantity > 0),
price DECIMAL(10, 2) NOT NULL,
UNIQUE(order_id, product_id)
);
-- Indexes for performance
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_products_category ON products(category_id);
CREATE INDEX idx_orders_user ON orders(user_id);
CREATE INDEX idx_orders_status ON orders(status);
CREATE INDEX idx_orders_created ON orders(created_at DESC);
CREATE INDEX idx_order_items_order ON order_items(order_id);
-- Full-text search index
CREATE INDEX idx_products_search ON products USING GIN(to_tsvector('english', name || ' ' || description));
-- Composite index for common queries
CREATE INDEX idx_products_category_price ON products(category_id, price);
3. MongoDB Schema Design
// MongoDB collections (embedded approach)
// Users collection
{
"_id": ObjectId("..."),
"email": "user@example.com",
"name": "John Doe",
"passwordHash": "$2b$10$...",
"profile": {
"avatar": "https://...",
"bio": "...",
"phoneNumber": "+1234567890"
},
"addresses": [
{
"type": "shipping",
"street": "123 Main St",
"city": "New York",
"country": "USA",
"isDefault": true
}
],
"createdAt": ISODate("2024-01-01T00:00:00Z"),
"updatedAt": ISODate("2024-01-01T00:00:00Z")
}
// Products collection
{
"_id": ObjectId("..."),
"name": "Laptop",
"description": "High-performance laptop",
"price": 999.99,
"stockQuantity": 50,
"category": {
"id": "electronics",
"name": "Electronics",
"path": ["Electronics", "Computers", "Laptops"]
},
"specifications": {
"cpu": "Intel i7",
"ram": "16GB",
"storage": "512GB SSD"
},
"images": ["url1", "url2"],
"tags": ["laptop", "computers", "electronics"],
"reviews": [
{
"userId": ObjectId("..."),
"userName": "Jane",
"rating": 5,
"comment": "Great product!",
"createdAt": ISODate("2024-01-15T00:00:00Z")
}
],
"avgRating": 4.5,
"reviewCount": 12,
"createdAt": ISODate("2024-01-01T00:00:00Z"),
"updatedAt": ISODate("2024-01-01T00:00:00Z")
}
// Orders collection (embedded items)
{
"_id": ObjectId("..."),
"userId": ObjectId("..."),
"status": "pending",
"items": [
{
"productId": ObjectId("..."),
"productName": "Laptop",
"quantity": 1,
"price": 999.99,
"subtotal": 999.99
}
],
"totalAmount": 999.99,
"shippingAddress": {
"street": "123 Main St",
"city": "New York",
"country": "USA"
},
"paymentInfo": {
"method": "credit_card",
"last4": "4242",
"transactionId": "txn_..."
},
"statusHistory": [
{
"status": "pending",
"timestamp": ISODate("2024-01-01T00:00:00Z")
},
{
"status": "processing",
"timestamp": ISODate("2024-01-02T00:00:00Z")
}
],
"createdAt": ISODate("2024-01-01T00:00:00Z"),
"updatedAt": ISODate("2024-01-02T00:00:00Z")
}
// MongoDB indexes
db.users.createIndex({ email: 1 }, { unique: true });
db.products.createIndex({ "category.id": 1 });
db.products.createIndex({ tags: 1 });
db.products.createIndex({ name: "text", description: "text" });
db.orders.createIndex({ userId: 1, createdAt: -1 });
db.orders.createIndex({ status: 1, createdAt: -1 });
4. Normalization vs Denormalization
PostgreSQL - Normalized (3NF)
-- Benefits: No data duplication, easy updates, data integrity
-- Drawback: Requires JOINs for complex queries
-- Get order with user and product info
SELECT
o.id,
o.status,
o.total_amount,
u.name as user_name,
u.email as user_email,
json_agg(
json_build_object(
'product_name', p.name,
'quantity', oi.quantity,
'price', oi.price
)
) as items
FROM orders o
JOIN users u ON o.user_id = u.id
JOIN order_items oi ON oi.order_id = o.id
JOIN products p ON oi.product_id = p.id
WHERE o.id = '...'
GROUP BY o.id, u.name, u.email;
-- Materialized view for better performance
CREATE MATERIALIZED VIEW order_summary AS
SELECT
o.id,
o.user_id,
u.name as user_name,
o.total_amount,
COUNT(oi.id) as item_count,
o.created_at
FROM orders o
JOIN users u ON o.user_id = u.id
JOIN order_items oi ON oi.order_id = o.id
GROUP BY o.id, u.name;
-- Refresh periodically
REFRESH MATERIALIZED VIEW CONCURRENTLY order_summary;
MongoDB - Denormalized (Embedded)
// Benefits: Single query, fast reads, no JOINs
// Drawbacks: Data duplication, harder updates
// Get complete order in one query
const order = await db.orders.findOne({ _id: orderId });
// Returns everything: user info, items, products
// Update product name everywhere (if denormalized)
await db.orders.updateMany(
{ "items.productId": productId },
{ $set: { "items.$.productName": newName } }
);
// Hybrid approach - reference + embed critical data
{
"orderId": ObjectId("..."),
"userId": ObjectId("..."), // Reference for updates
"userSnapshot": { // Embed for quick access
"name": "John Doe",
"email": "john@example.com"
},
"items": [
{
"productId": ObjectId("..."), // Reference
"productName": "Laptop", // Denormalized for display
"price": 999.99
}
]
}
5. Advanced Queries
PostgreSQL
-- Window functions
SELECT
name,
price,
category_id,
AVG(price) OVER (PARTITION BY category_id) as avg_category_price,
RANK() OVER (PARTITION BY category_id ORDER BY price DESC) as price_rank
FROM products;
-- Common Table Expressions (CTE)
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', created_at) as month,
SUM(total_amount) as revenue,
COUNT(*) as order_count
FROM orders
WHERE status = 'completed'
GROUP BY month
)
SELECT
month,
revenue,
LAG(revenue) OVER (ORDER BY month) as prev_month_revenue,
revenue - LAG(revenue) OVER (ORDER BY month) as growth
FROM monthly_sales;
-- Recursive CTE (category hierarchy)
WITH RECURSIVE category_tree AS (
SELECT id, name, parent_id, 1 as level
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT c.id, c.name, c.parent_id, ct.level + 1
FROM categories c
JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree ORDER BY level, name;
-- JSON queries
SELECT
id,
shipping_address->>'city' as city,
shipping_address->>'country' as country
FROM orders
WHERE shipping_address->>'country' = 'USA';
MongoDB
// Aggregation pipeline
db.orders.aggregate([
// Filter
{ $match: { status: 'completed' } },
// Unwind array
{ $unwind: '$items' },
// Group and calculate
{ $group: {
_id: '$items.productId',
totalQuantity: { $sum: '$items.quantity' },
totalRevenue: { $sum: { $multiply: ['$items.quantity', '$items.price'] } },
orderCount: { $sum: 1 }
}},
// Sort
{ $sort: { totalRevenue: -1 } },
// Limit
{ $limit: 10 },
// Lookup (JOIN)
{ $lookup: {
from: 'products',
localField: '_id',
foreignField: '_id',
as: 'product'
}},
// Project (shape output)
{ $project: {
productName: { $arrayElemAt: ['$product.name', 0] },
totalQuantity: 1,
totalRevenue: 1,
avgOrderValue: { $divide: ['$totalRevenue', '$orderCount'] }
}}
]);
// Text search
db.products.find(
{ $text: { $search: 'laptop gaming' } },
{ score: { $meta: 'textScore' } }
).sort({ score: { $meta: 'textScore' } });
// Geospatial queries
db.stores.createIndex({ location: '2dsphere' });
db.stores.find({
location: {
$near: {
$geometry: {
type: 'Point',
coordinates: [-73.97, 40.77] // [lng, lat]
},
$maxDistance: 5000 // 5km
}
}
});
6. Performance Optimization
PostgreSQL
-- Explain query plan
EXPLAIN ANALYZE
SELECT * FROM products WHERE category_id = '...' AND price > 100;
-- Index strategies
CREATE INDEX idx_products_price_btree ON products(price); -- B-tree (default)
CREATE INDEX idx_products_tags_gin ON products USING GIN(tags); -- Array/JSON
CREATE INDEX idx_products_partial ON products(price) WHERE stock_quantity > 0; -- Partial
-- Connection pooling (in Node.js)
import { Pool } from 'pg';
const pool = new Pool({
host: 'localhost',
database: 'mydb',
max: 20, // Max connections
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);
-- Query optimization
-- Bad: SELECT *
SELECT id, name, email FROM users WHERE id = $1;
-- Bad: N+1 queries
-- Good: Single query with JOIN
SELECT u.*, json_agg(o.*) as orders
FROM users u
LEFT JOIN orders o ON o.user_id = u.id
GROUP BY u.id;
MongoDB
// Explain query
db.products.find({ category: 'electronics' }).explain('executionStats');
// Index strategies
db.products.createIndex({ category: 1, price: -1 }); // Compound
db.products.createIndex({ name: 'text' }); // Text search
db.products.createIndex({ location: '2dsphere' }); // Geospatial
// Covered queries (uses only index)
db.products.find(
{ category: 'electronics' },
{ _id: 0, name: 1, price: 1 } // Only indexed fields
);
// Projection (return only needed fields)
db.products.find(
{ category: 'electronics' },
{ name: 1, price: 1, _id: 0 }
);
// Aggregation optimization
db.orders.aggregate([
{ $match: { status: 'completed' } }, // Filter early
{ $project: { items: 1, total: 1 } }, // Project early
{ $unwind: '$items' },
{ $group: { _id: '$items.productId', total: { $sum: 1 } } }
]);
// Use lean() in Mongoose for better performance
const products = await Product.find({ category: 'electronics' })
.lean() // Returns plain JS objects
.select('name price')
.limit(20);
7. Transactions
PostgreSQL
-- ACID transactions
BEGIN;
UPDATE products SET stock_quantity = stock_quantity - 1 WHERE id = $1;
INSERT INTO orders (user_id, total_amount) VALUES ($2, $3);
INSERT INTO order_items (order_id, product_id, quantity) VALUES ($4, $1, 1);
COMMIT; -- or ROLLBACK on error
-- In Node.js with pg
const client = await pool.connect();
try {
await client.query('BEGIN');
await client.query('UPDATE products SET stock_quantity = stock_quantity - $1 WHERE id = $2', [quantity, productId]);
const orderResult = await client.query('INSERT INTO orders (user_id, total_amount) VALUES ($1, $2) RETURNING id', [userId, total]);
await client.query('INSERT INTO order_items (order_id, product_id, quantity) VALUES ($1, $2, $3)', [orderResult.rows[0].id, productId, quantity]);
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
MongoDB
// Multi-document transactions (MongoDB 4.0+)
const session = await mongoose.startSession();
try {
session.startTransaction();
// Deduct stock
await Product.updateOne(
{ _id: productId },
{ $inc: { stockQuantity: -quantity } },
{ session }
);
// Create order
const order = await Order.create([{
userId,
items: [{ productId, quantity, price }],
totalAmount: total
}], { session });
await session.commitTransaction();
} catch (error) {
await session.abortTransaction();
throw error;
} finally {
session.endSession();
}
// Two-phase commit pattern (alternative)
// 1. Pending state
await Order.create({ status: 'pending', ... });
// 2. Update product
await Product.updateOne({ _id: productId }, { $inc: { stockQuantity: -quantity } });
// 3. Commit
await Order.updateOne({ _id: orderId }, { status: 'completed' });
8. When to Use SQL vs NoSQL
Use PostgreSQL (SQL) when:
- ✓ Complex relationships and JOINs are required
- ✓ ACID transactions are critical (banking, payments)
- ✓ Data structure is well-defined and stable
- ✓ Complex analytical queries and reporting
- ✓ Data integrity and consistency are priority
- ✓ Vertical scaling is acceptable
- ✓ Rapid development with evolving schema
- ✓ Storing unstructured or semi-structured data
- ✓ Horizontal scaling is required
- ✓ High write throughput needed
- ✓ Document-oriented data model fits naturally
- ✓ Flexibility more important than consistency
- ✓ PostgreSQL for transactional data (orders, payments)
- ✓ MongoDB for user-generated content (posts, comments)
- ✓ Redis for caching and sessions
- ✓ Elasticsearch for full-text search
Conclusion
SQL and NoSQL databases serve different needs. PostgreSQL excels at complex queries and consistency, while MongoDB offers flexibility and horizontal scaling. Understanding schema design, indexing, and query optimization is crucial for both. Choose based on your specific requirements, not trends.
💡 Pro Tip: Start with PostgreSQL unless you have specific requirements for NoSQL. PostgreSQL can handle most workloads, supports JSON data (JSONB), has excellent tooling, and is easier to reason about with ACID guarantees. Only move to NoSQL when you hit clear limitations.