Skip to content

PostgreSQL - Advanced Open Source Database

PostgreSQL, often called "Postgres," stands as one of the most advanced open-source relational database management systems available today. Originally developed at the University of California, Berkeley, and first released in 1996, PostgreSQL has evolved into a sophisticated database platform that combines SQL compliance with powerful extensions and advanced features. Known for its reliability, data integrity, and extensibility, PostgreSQL supports both relational and non-relational data models, making it suitable for a wide range of applications from simple web applications to complex analytical systems and geographic information systems.

Installation and Setup

Installing PostgreSQL

sql
-- Ubuntu/Debian
sudo apt update
sudo apt install postgresql postgresql-contrib

-- CentOS/RHEL/Fedora
sudo dnf install postgresql postgresql-server postgresql-contrib

-- macOS (using Homebrew)
brew install postgresql

-- Initialize database (CentOS/RHEL)
sudo postgresql-setup --initdb

-- Start PostgreSQL service
sudo systemctl start postgresql
sudo systemctl enable postgresql

-- Switch to postgres user
sudo -i -u postgres

-- Access PostgreSQL prompt
psql

Initial Configuration

sql
-- Create a new database
CREATE DATABASE myapp_db;

-- Create a new user with password
CREATE USER myapp_user WITH PASSWORD 'secure_password';

-- Grant privileges to user
GRANT ALL PRIVILEGES ON DATABASE myapp_db TO myapp_user;

-- Connect to specific database
\c myapp_db

-- Create schema
CREATE SCHEMA app_schema;

-- Grant schema privileges
GRANT ALL ON SCHEMA app_schema TO myapp_user;
GRANT ALL ON ALL TABLES IN SCHEMA app_schema TO myapp_user;
GRANT ALL ON ALL SEQUENCES IN SCHEMA app_schema TO myapp_user;

-- Set default privileges for future objects
ALTER DEFAULT PRIVILEGES IN SCHEMA app_schema 
GRANT ALL ON TABLES TO myapp_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA app_schema 
GRANT ALL ON SEQUENCES TO myapp_user;

Configuration Files

sql
-- Main configuration files
-- postgresql.conf: Main configuration
-- pg_hba.conf: Client authentication
-- pg_ident.conf: User name mapping

-- Key postgresql.conf settings
listen_addresses = 'localhost'          # IP addresses to listen on
port = 5432                            # Port number
max_connections = 100                  # Maximum concurrent connections
shared_buffers = 256MB                 # Shared memory for caching
effective_cache_size = 1GB             # Estimate of OS cache size
work_mem = 4MB                         # Memory for query operations
maintenance_work_mem = 64MB            # Memory for maintenance operations

-- Logging settings
log_destination = 'stderr'
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
log_statement = 'all'                  # Log all statements
log_min_duration_statement = 1000      # Log slow queries (1 second)

-- Show current configuration
SHOW ALL;
SHOW shared_buffers;
SHOW max_connections;

-- Reload configuration
SELECT pg_reload_conf();

psql Command Line Interface

sql
-- Connect to database
psql -h localhost -p 5432 -U username -d database_name

-- psql meta-commands
\l                    -- List databases
\c database_name      -- Connect to database
\dt                   -- List tables
\d table_name         -- Describe table
\du                   -- List users/roles
\dn                   -- List schemas
\df                   -- List functions
\dv                   -- List views
\di                   -- List indexes
\dp                   -- List table privileges

-- Query execution
\timing on            -- Show query execution time
\x                    -- Toggle expanded display
\q                    -- Quit psql

-- File operations
\i filename.sql       -- Execute SQL file
\o filename.txt       -- Redirect output to file
\o                    -- Stop redirecting output

-- Variables
\set variable_name value
\echo :variable_name

Data Types and Advanced Features

Core Data Types

sql
-- Numeric types
CREATE TABLE numeric_examples (
    small_int SMALLINT,              -- 2 bytes: -32,768 to 32,767
    regular_int INTEGER,             -- 4 bytes: -2,147,483,648 to 2,147,483,647
    big_int BIGINT,                  -- 8 bytes: large range
    decimal_precise DECIMAL(10,2),   -- Exact decimal
    numeric_val NUMERIC(15,5),       -- Exact numeric
    real_val REAL,                   -- 4-byte floating point
    double_val DOUBLE PRECISION,     -- 8-byte floating point
    serial_id SERIAL,                -- Auto-incrementing integer
    big_serial_id BIGSERIAL          -- Auto-incrementing big integer
);

-- String types
CREATE TABLE string_examples (
    fixed_char CHAR(10),             -- Fixed length
    variable_char VARCHAR(255),      -- Variable length with limit
    unlimited_text TEXT,             -- Unlimited length text
    binary_data BYTEA                -- Binary data
);

-- Date and time types
CREATE TABLE datetime_examples (
    date_only DATE,                  -- Date only
    time_only TIME,                  -- Time only
    time_with_tz TIME WITH TIME ZONE, -- Time with timezone
    timestamp_val TIMESTAMP,         -- Date and time
    timestamp_tz TIMESTAMP WITH TIME ZONE, -- Timestamp with timezone
    interval_val INTERVAL            -- Time interval
);

-- Boolean and other types
CREATE TABLE misc_examples (
    boolean_val BOOLEAN,             -- TRUE/FALSE
    uuid_val UUID,                   -- Universally unique identifier
    inet_addr INET,                  -- IP address
    mac_addr MACADDR,                -- MAC address
    json_data JSON,                  -- JSON data
    jsonb_data JSONB,                -- Binary JSON (more efficient)
    xml_data XML,                    -- XML data
    point_geom POINT,                -- Geometric point
    polygon_geom POLYGON             -- Geometric polygon
);

Arrays

sql
-- Array data types
CREATE TABLE array_examples (
    id SERIAL PRIMARY KEY,
    tags TEXT[],                     -- Array of text
    scores INTEGER[],                -- Array of integers
    matrix INTEGER[][],              -- Multi-dimensional array
    schedule TIME[]                  -- Array of time values
);

-- Insert array data
INSERT INTO array_examples (tags, scores, matrix) VALUES
(ARRAY['postgresql', 'database', 'sql'], ARRAY[95, 87, 92], ARRAY[[1,2],[3,4]]),
('{"web", "development"}', '{85, 90, 88}', '{{5,6},{7,8}}');

-- Query array data
SELECT * FROM array_examples WHERE 'postgresql' = ANY(tags);
SELECT * FROM array_examples WHERE tags @> ARRAY['database'];
SELECT * FROM array_examples WHERE array_length(scores, 1) > 2;

-- Array functions
SELECT 
    tags,
    array_length(tags, 1) AS tag_count,
    array_upper(scores, 1) AS max_score_index,
    array_to_string(tags, ', ') AS tags_string,
    unnest(tags) AS individual_tags
FROM array_examples;

-- Update arrays
UPDATE array_examples 
SET tags = array_append(tags, 'advanced')
WHERE id = 1;

UPDATE array_examples 
SET tags = array_remove(tags, 'sql')
WHERE id = 1;

JSON and JSONB

sql
-- JSON vs JSONB comparison
CREATE TABLE json_examples (
    id SERIAL PRIMARY KEY,
    data JSON,                       -- Text-based JSON storage
    data_binary JSONB,               -- Binary JSON storage (recommended)
    metadata JSONB
);

-- Insert JSON data
INSERT INTO json_examples (data, data_binary, metadata) VALUES
('{"name": "John", "age": 30, "skills": ["PostgreSQL", "Python"]}',
 '{"name": "John", "age": 30, "skills": ["PostgreSQL", "Python"]}',
 '{"created_at": "2024-01-15", "source": "api", "version": 1}');

-- JSON operators and functions
SELECT 
    data->>'name' AS name,                    -- Extract as text
    data->'age' AS age_json,                  -- Extract as JSON
    (data->>'age')::INTEGER AS age_int,       -- Extract and cast
    data#>'{skills,0}' AS first_skill,        -- Extract nested value
    jsonb_array_length(data_binary->'skills') AS skill_count
FROM json_examples;

-- JSON path queries
SELECT * FROM json_examples 
WHERE data_binary @> '{"name": "John"}';     -- Contains

SELECT * FROM json_examples 
WHERE data_binary ? 'age';                   -- Key exists

SELECT * FROM json_examples 
WHERE data_binary ?& array['name', 'age'];   -- All keys exist

SELECT * FROM json_examples 
WHERE data_binary ?| array['name', 'email']; -- Any key exists

-- JSON aggregation
SELECT 
    jsonb_agg(data_binary) AS all_records,
    jsonb_object_agg(id, data_binary->>'name') AS id_name_map
FROM json_examples;

-- Update JSON data
UPDATE json_examples 
SET data_binary = jsonb_set(data_binary, '{age}', '31')
WHERE id = 1;

UPDATE json_examples 
SET data_binary = data_binary || '{"updated": true}'::jsonb
WHERE id = 1;

-- Remove JSON keys
UPDATE json_examples 
SET data_binary = data_binary - 'age'
WHERE id = 1;

Custom Data Types and Domains

sql
-- Create custom enum type
CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');

-- Create composite type
CREATE TYPE address AS (
    street TEXT,
    city TEXT,
    state TEXT,
    zip_code TEXT
);

-- Create domain (constrained type)
CREATE DOMAIN email AS TEXT
CHECK (VALUE ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$');

CREATE DOMAIN positive_integer AS INTEGER
CHECK (VALUE > 0);

-- Use custom types
CREATE TABLE person (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    current_mood mood,
    home_address address,
    email_address email,
    age positive_integer
);

-- Insert data with custom types
INSERT INTO person (name, current_mood, home_address, email_address, age) VALUES
('John Doe', 'happy', ROW('123 Main St', 'Anytown', 'CA', '12345'), 'john@example.com', 30);

-- Query composite types
SELECT 
    name,
    (home_address).street,
    (home_address).city
FROM person;

Advanced SQL Features

Window Functions

sql
-- Create sample data
CREATE TABLE sales (
    id SERIAL PRIMARY KEY,
    salesperson TEXT,
    region TEXT,
    sale_date DATE,
    amount DECIMAL(10,2)
);

INSERT INTO sales (salesperson, region, sale_date, amount) VALUES
('Alice', 'North', '2024-01-15', 1000.00),
('Bob', 'South', '2024-01-16', 1500.00),
('Alice', 'North', '2024-01-17', 1200.00),
('Charlie', 'East', '2024-01-18', 800.00),
('Bob', 'South', '2024-01-19', 2000.00);

-- Basic window functions
SELECT 
    salesperson,
    region,
    amount,
    ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num,
    RANK() OVER (ORDER BY amount DESC) AS rank,
    DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank,
    NTILE(3) OVER (ORDER BY amount) AS quartile
FROM sales;

-- Partition by region
SELECT 
    salesperson,
    region,
    amount,
    ROW_NUMBER() OVER (PARTITION BY region ORDER BY amount DESC) AS region_rank,
    SUM(amount) OVER (PARTITION BY region) AS region_total,
    AVG(amount) OVER (PARTITION BY region) AS region_avg,
    COUNT(*) OVER (PARTITION BY region) AS region_count
FROM sales;

-- Running totals and moving averages
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (ORDER BY sale_date) AS running_total,
    AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg_3,
    LAG(amount, 1) OVER (ORDER BY sale_date) AS previous_amount,
    LEAD(amount, 1) OVER (ORDER BY sale_date) AS next_amount,
    amount - LAG(amount, 1) OVER (ORDER BY sale_date) AS amount_change
FROM sales
ORDER BY sale_date;

-- First and last values
SELECT 
    salesperson,
    region,
    amount,
    FIRST_VALUE(amount) OVER (PARTITION BY region ORDER BY sale_date) AS first_sale,
    LAST_VALUE(amount) OVER (PARTITION BY region ORDER BY sale_date 
                            ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_sale
FROM sales;

Common Table Expressions (CTEs)

sql
-- Basic CTE
WITH regional_stats AS (
    SELECT 
        region,
        COUNT(*) AS sale_count,
        SUM(amount) AS total_amount,
        AVG(amount) AS avg_amount
    FROM sales
    GROUP BY region
)
SELECT 
    region,
    sale_count,
    total_amount,
    avg_amount,
    ROUND((total_amount / SUM(total_amount) OVER()) * 100, 2) AS percentage_of_total
FROM regional_stats
ORDER BY total_amount DESC;

-- Recursive CTE - Employee hierarchy
CREATE TABLE employees (
    id INTEGER PRIMARY KEY,
    name TEXT,
    manager_id INTEGER REFERENCES employees(id)
);

INSERT INTO employees VALUES
(1, 'CEO', NULL),
(2, 'VP Sales', 1),
(3, 'VP Engineering', 1),
(4, 'Sales Manager', 2),
(5, 'Engineer', 3),
(6, 'Senior Engineer', 3);

WITH RECURSIVE employee_hierarchy AS (
    -- Base case: top-level employees (no manager)
    SELECT id, name, manager_id, 0 AS level, name AS path
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- Recursive case: employees with managers
    SELECT 
        e.id, 
        e.name, 
        e.manager_id, 
        eh.level + 1,
        eh.path || ' -> ' || e.name
    FROM employees e
    JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT * FROM employee_hierarchy
ORDER BY level, name;

-- Multiple CTEs
WITH 
monthly_sales AS (
    SELECT 
        DATE_TRUNC('month', sale_date) AS month,
        SUM(amount) AS monthly_total
    FROM sales
    GROUP BY DATE_TRUNC('month', sale_date)
),
sales_growth AS (
    SELECT 
        month,
        monthly_total,
        LAG(monthly_total) OVER (ORDER BY month) AS previous_month,
        monthly_total - LAG(monthly_total) OVER (ORDER BY month) AS growth
    FROM monthly_sales
)
SELECT 
    month,
    monthly_total,
    previous_month,
    growth,
    CASE 
        WHEN previous_month IS NULL THEN NULL
        ELSE ROUND((growth / previous_month) * 100, 2)
    END AS growth_percentage
FROM sales_growth
ORDER BY month;

Advanced Joins and Set Operations

sql
-- Create related tables
CREATE TABLE customers (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT UNIQUE,
    created_at TIMESTAMP DEFAULT NOW()
);

CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    customer_id INTEGER REFERENCES customers(id),
    order_date DATE,
    total_amount DECIMAL(10,2)
);

-- LATERAL joins (correlated subqueries in FROM clause)
SELECT 
    c.name,
    c.email,
    recent_orders.order_count,
    recent_orders.total_spent
FROM customers c
LEFT JOIN LATERAL (
    SELECT 
        COUNT(*) AS order_count,
        SUM(total_amount) AS total_spent
    FROM orders o
    WHERE o.customer_id = c.id
    AND o.order_date > CURRENT_DATE - INTERVAL '30 days'
) recent_orders ON true;

-- FULL OUTER JOIN with COALESCE
SELECT 
    COALESCE(c.name, 'Unknown Customer') AS customer_name,
    COALESCE(o.order_date, 'No Orders') AS order_info,
    COALESCE(o.total_amount, 0) AS amount
FROM customers c
FULL OUTER JOIN orders o ON c.id = o.customer_id;

-- Set operations
-- UNION (removes duplicates)
SELECT name FROM customers WHERE created_at > '2024-01-01'
UNION
SELECT 'VIP Customer' AS name;

-- UNION ALL (keeps duplicates)
SELECT region FROM sales WHERE amount > 1000
UNION ALL
SELECT region FROM sales WHERE sale_date > '2024-01-15';

-- INTERSECT (common values)
SELECT region FROM sales WHERE amount > 1000
INTERSECT
SELECT region FROM sales WHERE sale_date > '2024-01-15';

-- EXCEPT (values in first query but not second)
SELECT region FROM sales WHERE amount > 1000
EXCEPT
SELECT region FROM sales WHERE sale_date < '2024-01-15';

Indexes and Performance Optimization

Index Types and Creation

sql
-- B-tree index (default)
CREATE INDEX idx_customers_email ON customers(email);

-- Partial index
CREATE INDEX idx_active_customers ON customers(name) 
WHERE created_at > '2024-01-01';

-- Composite index
CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date);

-- Unique index
CREATE UNIQUE INDEX idx_customers_email_unique ON customers(email);

-- Expression index
CREATE INDEX idx_customers_lower_email ON customers(LOWER(email));

-- GIN index for arrays and JSON
CREATE INDEX idx_array_tags ON array_examples USING GIN(tags);
CREATE INDEX idx_jsonb_data ON json_examples USING GIN(data_binary);

-- GiST index for geometric data
CREATE INDEX idx_location ON locations USING GIST(coordinates);

-- Hash index (for equality comparisons only)
CREATE INDEX idx_customers_id_hash ON customers USING HASH(id);

-- Show index information
SELECT 
    schemaname,
    tablename,
    indexname,
    indexdef
FROM pg_indexes
WHERE tablename = 'customers';

-- Analyze index usage
SELECT 
    schemaname,
    tablename,
    attname,
    n_distinct,
    correlation
FROM pg_stats
WHERE tablename = 'customers';

Query Performance Analysis

sql
-- EXPLAIN query execution plan
EXPLAIN SELECT * FROM customers WHERE email = 'john@example.com';

-- EXPLAIN ANALYZE for actual execution statistics
EXPLAIN ANALYZE SELECT 
    c.name,
    COUNT(o.id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name;

-- EXPLAIN with detailed options
EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT JSON) 
SELECT * FROM orders WHERE order_date > '2024-01-01';

-- Query performance monitoring
-- Enable query statistics
SELECT pg_stat_reset();

-- View query statistics
SELECT 
    query,
    calls,
    total_time,
    mean_time,
    rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 10;

-- Table statistics
SELECT 
    schemaname,
    tablename,
    seq_scan,
    seq_tup_read,
    idx_scan,
    idx_tup_fetch,
    n_tup_ins,
    n_tup_upd,
    n_tup_del
FROM pg_stat_user_tables;

-- Index usage statistics
SELECT 
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch
FROM pg_stat_user_indexes;

Performance Tuning

sql
-- Update table statistics
ANALYZE customers;
ANALYZE orders;

-- Vacuum tables
VACUUM customers;
VACUUM ANALYZE orders;

-- Full vacuum (reclaims space)
VACUUM FULL customers;

-- Auto-vacuum settings
SHOW autovacuum;
SELECT name, setting FROM pg_settings WHERE name LIKE 'autovacuum%';

-- Connection and memory settings
SHOW max_connections;
SHOW shared_buffers;
SHOW work_mem;
SHOW maintenance_work_mem;
SHOW effective_cache_size;

-- Query optimization techniques
-- Use appropriate data types
-- Avoid SELECT *
-- Use LIMIT when appropriate
-- Use EXISTS instead of IN for subqueries
-- Use proper JOIN conditions

-- Example: Optimized query
SELECT c.name, c.email
FROM customers c
WHERE EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.customer_id = c.id 
    AND o.order_date > CURRENT_DATE - INTERVAL '30 days'
)
LIMIT 100;

Stored Procedures and Functions

PL/pgSQL Functions

sql
-- Basic function
CREATE OR REPLACE FUNCTION get_customer_order_count(customer_id INTEGER)
RETURNS INTEGER AS $$
BEGIN
    RETURN (
        SELECT COUNT(*)
        FROM orders
        WHERE orders.customer_id = $1
    );
END;
$$ LANGUAGE plpgsql;

-- Use function
SELECT name, get_customer_order_count(id) AS order_count
FROM customers;

-- Function with multiple parameters and return type
CREATE OR REPLACE FUNCTION calculate_order_stats(
    p_customer_id INTEGER,
    p_start_date DATE DEFAULT NULL,
    p_end_date DATE DEFAULT NULL
)
RETURNS TABLE(
    order_count BIGINT,
    total_amount NUMERIC,
    avg_amount NUMERIC,
    min_amount NUMERIC,
    max_amount NUMERIC
) AS $$
BEGIN
    RETURN QUERY
    SELECT 
        COUNT(*)::BIGINT,
        COALESCE(SUM(o.total_amount), 0),
        COALESCE(AVG(o.total_amount), 0),
        COALESCE(MIN(o.total_amount), 0),
        COALESCE(MAX(o.total_amount), 0)
    FROM orders o
    WHERE o.customer_id = p_customer_id
    AND (p_start_date IS NULL OR o.order_date >= p_start_date)
    AND (p_end_date IS NULL OR o.order_date <= p_end_date);
END;
$$ LANGUAGE plpgsql;

-- Function with conditional logic
CREATE OR REPLACE FUNCTION get_customer_tier(customer_id INTEGER)
RETURNS TEXT AS $$
DECLARE
    total_spent NUMERIC;
    order_count INTEGER;
BEGIN
    SELECT 
        COALESCE(SUM(total_amount), 0),
        COUNT(*)
    INTO total_spent, order_count
    FROM orders
    WHERE orders.customer_id = $1;
    
    IF total_spent >= 10000 THEN
        RETURN 'Platinum';
    ELSIF total_spent >= 5000 THEN
        RETURN 'Gold';
    ELSIF total_spent >= 1000 THEN
        RETURN 'Silver';
    ELSIF order_count > 0 THEN
        RETURN 'Bronze';
    ELSE
        RETURN 'New';
    END IF;
END;
$$ LANGUAGE plpgsql;

-- Function with exception handling
CREATE OR REPLACE FUNCTION safe_divide(numerator NUMERIC, denominator NUMERIC)
RETURNS NUMERIC AS $$
BEGIN
    IF denominator = 0 THEN
        RAISE EXCEPTION 'Division by zero is not allowed';
    END IF;
    
    RETURN numerator / denominator;
EXCEPTION
    WHEN division_by_zero THEN
        RAISE NOTICE 'Division by zero attempted, returning NULL';
        RETURN NULL;
    WHEN OTHERS THEN
        RAISE NOTICE 'An error occurred: %', SQLERRM;
        RETURN NULL;
END;
$$ LANGUAGE plpgsql;

Stored Procedures (PostgreSQL 11+)

sql
-- Basic stored procedure
CREATE OR REPLACE PROCEDURE update_customer_tier()
LANGUAGE plpgsql AS $$
DECLARE
    customer_record RECORD;
BEGIN
    FOR customer_record IN 
        SELECT id FROM customers
    LOOP
        UPDATE customers 
        SET tier = get_customer_tier(customer_record.id)
        WHERE id = customer_record.id;
    END LOOP;
    
    COMMIT;
END;
$$;

-- Call stored procedure
CALL update_customer_tier();

-- Procedure with parameters
CREATE OR REPLACE PROCEDURE process_monthly_orders(
    p_year INTEGER,
    p_month INTEGER
)
LANGUAGE plpgsql AS $$
DECLARE
    start_date DATE;
    end_date DATE;
    total_orders INTEGER;
BEGIN
    start_date := DATE(p_year || '-' || p_month || '-01');
    end_date := start_date + INTERVAL '1 month' - INTERVAL '1 day';
    
    SELECT COUNT(*) INTO total_orders
    FROM orders
    WHERE order_date BETWEEN start_date AND end_date;
    
    RAISE NOTICE 'Processed % orders for %-%', total_orders, p_year, p_month;
    
    -- Additional processing logic here
    
    COMMIT;
END;
$$;

Triggers

sql
-- Create audit table
CREATE TABLE customer_audit (
    id SERIAL PRIMARY KEY,
    customer_id INTEGER,
    action TEXT,
    old_values JSONB,
    new_values JSONB,
    changed_at TIMESTAMP DEFAULT NOW(),
    changed_by TEXT DEFAULT CURRENT_USER
);

-- Trigger function
CREATE OR REPLACE FUNCTION customer_audit_trigger()
RETURNS TRIGGER AS $$
BEGIN
    IF TG_OP = 'INSERT' THEN
        INSERT INTO customer_audit (customer_id, action, new_values)
        VALUES (NEW.id, 'INSERT', row_to_json(NEW)::jsonb);
        RETURN NEW;
    ELSIF TG_OP = 'UPDATE' THEN
        INSERT INTO customer_audit (customer_id, action, old_values, new_values)
        VALUES (NEW.id, 'UPDATE', row_to_json(OLD)::jsonb, row_to_json(NEW)::jsonb);
        RETURN NEW;
    ELSIF TG_OP = 'DELETE' THEN
        INSERT INTO customer_audit (customer_id, action, old_values)
        VALUES (OLD.id, 'DELETE', row_to_json(OLD)::jsonb);
        RETURN OLD;
    END IF;
    RETURN NULL;
END;
$$ LANGUAGE plpgsql;

-- Create triggers
CREATE TRIGGER customer_audit_insert
    AFTER INSERT ON customers
    FOR EACH ROW EXECUTE FUNCTION customer_audit_trigger();

CREATE TRIGGER customer_audit_update
    AFTER UPDATE ON customers
    FOR EACH ROW EXECUTE FUNCTION customer_audit_trigger();

CREATE TRIGGER customer_audit_delete
    AFTER DELETE ON customers
    FOR EACH ROW EXECUTE FUNCTION customer_audit_trigger();

-- Timestamp trigger
CREATE OR REPLACE FUNCTION update_modified_column()
RETURNS TRIGGER AS $$
BEGIN
    NEW.updated_at = NOW();
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER update_customer_modtime
    BEFORE UPDATE ON customers
    FOR EACH ROW EXECUTE FUNCTION update_modified_column();

Transactions and Concurrency

Transaction Management

sql
-- Basic transaction
BEGIN;
INSERT INTO customers (name, email) VALUES ('New Customer', 'new@example.com');
INSERT INTO orders (customer_id, order_date, total_amount) 
VALUES (currval('customers_id_seq'), CURRENT_DATE, 100.00);
COMMIT;

-- Transaction with rollback
BEGIN;
UPDATE customers SET email = 'updated@example.com' WHERE id = 1;
-- Check the change
SELECT * FROM customers WHERE id = 1;
-- Rollback if not satisfied
ROLLBACK;

-- Savepoints
BEGIN;
INSERT INTO customers (name, email) VALUES ('Customer 1', 'c1@example.com');
SAVEPOINT sp1;

INSERT INTO customers (name, email) VALUES ('Customer 2', 'c2@example.com');
SAVEPOINT sp2;

INSERT INTO customers (name, email) VALUES ('Customer 3', 'c3@example.com');

-- Rollback to savepoint
ROLLBACK TO SAVEPOINT sp2;
-- Customer 3 is rolled back, but Customer 1 and 2 remain

COMMIT;

Isolation Levels

sql
-- Show current isolation level
SHOW transaction_isolation;

-- Set isolation level for session
SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL READ COMMITTED;

-- Set isolation level for transaction
BEGIN ISOLATION LEVEL REPEATABLE READ;
-- Transaction statements here
COMMIT;

-- Isolation levels in PostgreSQL:
-- READ UNCOMMITTED: Allows dirty reads (rarely used)
-- READ COMMITTED: Default, prevents dirty reads
-- REPEATABLE READ: Prevents dirty and non-repeatable reads
-- SERIALIZABLE: Prevents all phenomena, highest isolation

-- Example of isolation level effects
-- Session 1:
BEGIN ISOLATION LEVEL REPEATABLE READ;
SELECT * FROM customers WHERE id = 1;

-- Session 2:
UPDATE customers SET name = 'Updated Name' WHERE id = 1;

-- Session 1 (continued):
SELECT * FROM customers WHERE id = 1; -- Still sees old value
COMMIT;

Locking

sql
-- Advisory locks (application-level)
SELECT pg_advisory_lock(12345);
-- Critical section code here
SELECT pg_advisory_unlock(12345);

-- Try advisory lock (non-blocking)
SELECT pg_try_advisory_lock(12345);

-- Row-level locking
BEGIN;
SELECT * FROM customers WHERE id = 1 FOR UPDATE;
-- Row is locked for updates by other transactions
UPDATE customers SET name = 'New Name' WHERE id = 1;
COMMIT;

-- Shared lock
SELECT * FROM customers WHERE id = 1 FOR SHARE;

-- Lock with NOWAIT
SELECT * FROM customers WHERE id = 1 FOR UPDATE NOWAIT;

-- Lock with SKIP LOCKED
SELECT * FROM orders WHERE processed = false 
FOR UPDATE SKIP LOCKED LIMIT 10;

-- Table-level locking
BEGIN;
LOCK TABLE customers IN ACCESS EXCLUSIVE MODE;
-- Table is locked for exclusive access
COMMIT;

-- Show current locks
SELECT 
    locktype,
    database,
    relation::regclass,
    page,
    tuple,
    virtualxid,
    transactionid,
    mode,
    granted
FROM pg_locks;

Extensions and Advanced Features

sql
-- Show available extensions
SELECT * FROM pg_available_extensions ORDER BY name;

-- Install extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
CREATE EXTENSION IF NOT EXISTS "hstore";
CREATE EXTENSION IF NOT EXISTS "ltree";
CREATE EXTENSION IF NOT EXISTS "pg_trgm";

-- UUID generation
SELECT uuid_generate_v4();
SELECT uuid_generate_v1();

-- Cryptographic functions
SELECT crypt('password', gen_salt('bf'));
SELECT digest('data', 'sha256');

-- hstore (key-value pairs)
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name TEXT,
    attributes HSTORE
);

INSERT INTO products (name, attributes) VALUES
('Laptop', 'brand=>Dell, ram=>16GB, storage=>512GB SSD'),
('Phone', 'brand=>Apple, model=>iPhone, storage=>128GB');

SELECT name, attributes->'brand' AS brand FROM products;
SELECT * FROM products WHERE attributes ? 'ram';
SELECT * FROM products WHERE attributes @> 'brand=>Apple';

-- ltree (hierarchical data)
CREATE TABLE categories (
    id SERIAL PRIMARY KEY,
    path LTREE,
    name TEXT
);

INSERT INTO categories (path, name) VALUES
('electronics', 'Electronics'),
('electronics.computers', 'Computers'),
('electronics.computers.laptops', 'Laptops'),
('electronics.phones', 'Phones');

SELECT * FROM categories WHERE path ~ 'electronics.*';
SELECT * FROM categories WHERE path <@ 'electronics.computers';

-- Full-text search with pg_trgm
CREATE INDEX idx_customers_name_trgm ON customers USING GIN(name gin_trgm_ops);

SELECT * FROM customers WHERE name % 'john';  -- Similarity search
SELECT name, similarity(name, 'john') FROM customers WHERE name % 'john';
sql
-- Basic full-text search
CREATE TABLE documents (
    id SERIAL PRIMARY KEY,
    title TEXT,
    content TEXT,
    search_vector TSVECTOR
);

-- Update search vector
UPDATE documents 
SET search_vector = to_tsvector('english', title || ' ' || content);

-- Create index for full-text search
CREATE INDEX idx_documents_search ON documents USING GIN(search_vector);

-- Search documents
SELECT title, ts_rank(search_vector, query) AS rank
FROM documents, to_tsquery('english', 'postgresql & database') AS query
WHERE search_vector @@ query
ORDER BY rank DESC;

-- Highlight search results
SELECT 
    title,
    ts_headline('english', content, to_tsquery('postgresql & database')) AS highlighted
FROM documents
WHERE search_vector @@ to_tsquery('english', 'postgresql & database');

-- Automatic search vector update
CREATE OR REPLACE FUNCTION update_search_vector()
RETURNS TRIGGER AS $$
BEGIN
    NEW.search_vector := to_tsvector('english', NEW.title || ' ' || NEW.content);
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER documents_search_update
    BEFORE INSERT OR UPDATE ON documents
    FOR EACH ROW EXECUTE FUNCTION update_search_vector();

Partitioning

sql
-- Range partitioning by date
CREATE TABLE sales_data (
    id SERIAL,
    sale_date DATE NOT NULL,
    amount DECIMAL(10,2),
    region TEXT
) PARTITION BY RANGE (sale_date);

-- Create partitions
CREATE TABLE sales_2024_q1 PARTITION OF sales_data
    FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');

CREATE TABLE sales_2024_q2 PARTITION OF sales_data
    FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');

CREATE TABLE sales_2024_q3 PARTITION OF sales_data
    FOR VALUES FROM ('2024-07-01') TO ('2024-10-01');

-- Hash partitioning
CREATE TABLE user_sessions (
    id SERIAL,
    user_id INTEGER NOT NULL,
    session_data JSONB,
    created_at TIMESTAMP DEFAULT NOW()
) PARTITION BY HASH (user_id);

CREATE TABLE user_sessions_0 PARTITION OF user_sessions
    FOR VALUES WITH (MODULUS 4, REMAINDER 0);

CREATE TABLE user_sessions_1 PARTITION OF user_sessions
    FOR VALUES WITH (MODULUS 4, REMAINDER 1);

-- List partitioning
CREATE TABLE regional_data (
    id SERIAL,
    region TEXT NOT NULL,
    data JSONB
) PARTITION BY LIST (region);

CREATE TABLE regional_data_north PARTITION OF regional_data
    FOR VALUES IN ('north', 'northeast', 'northwest');

CREATE TABLE regional_data_south PARTITION OF regional_data
    FOR VALUES IN ('south', 'southeast', 'southwest');

-- Show partition information
SELECT 
    schemaname,
    tablename,
    partitionboundary
FROM pg_partitions;

Backup and Recovery

Backup Strategies

sql
-- pg_dump for logical backups
-- Full database backup
pg_dump -h localhost -U username -d database_name > backup.sql

-- Backup with custom format (recommended)
pg_dump -h localhost -U username -Fc database_name > backup.dump

-- Backup specific tables
pg_dump -h localhost -U username -t customers -t orders database_name > tables.sql

-- Backup schema only
pg_dump -h localhost -U username -s database_name > schema.sql

-- Backup data only
pg_dump -h localhost -U username -a database_name > data.sql

-- Backup all databases
pg_dumpall -h localhost -U username > all_databases.sql

-- Backup with compression
pg_dump -h localhost -U username -Fc -Z 9 database_name > backup_compressed.dump

-- Parallel backup (faster for large databases)
pg_dump -h localhost -U username -Fd -j 4 database_name -f backup_directory/

Restore Operations

sql
-- Restore from SQL file
psql -h localhost -U username -d database_name < backup.sql

-- Restore from custom format
pg_restore -h localhost -U username -d database_name backup.dump

-- Restore with options
pg_restore -h localhost -U username -d database_name -v -c backup.dump

-- Restore specific tables
pg_restore -h localhost -U username -d database_name -t customers backup.dump

-- Restore to different database
createdb new_database
pg_restore -h localhost -U username -d new_database backup.dump

-- Parallel restore
pg_restore -h localhost -U username -d database_name -j 4 backup_directory/

-- List contents of backup file
pg_restore -l backup.dump

-- Restore with custom list
pg_restore -h localhost -U username -d database_name -L restore_list.txt backup.dump

Point-in-Time Recovery (PITR)

sql
-- Enable WAL archiving in postgresql.conf
wal_level = replica
archive_mode = on
archive_command = 'cp %p /path/to/archive/%f'
max_wal_senders = 3

-- Create base backup
pg_basebackup -h localhost -U username -D /path/to/backup -Ft -z -P

-- Recovery configuration (recovery.conf or postgresql.conf in v12+)
restore_command = 'cp /path/to/archive/%f %p'
recovery_target_time = '2024-01-15 14:30:00'

-- Start recovery
pg_ctl start -D /path/to/backup

-- Show WAL information
SELECT pg_current_wal_lsn();
SELECT pg_walfile_name(pg_current_wal_lsn());

-- Manual WAL switching
SELECT pg_switch_wal();

Streaming Replication

sql
-- Master configuration (postgresql.conf)
wal_level = replica
max_wal_senders = 3
wal_keep_segments = 32
synchronous_standby_names = 'standby1'

-- Create replication user
CREATE USER replicator REPLICATION LOGIN CONNECTION LIMIT 1 ENCRYPTED PASSWORD 'password';

-- pg_hba.conf entry for replication
host replication replicator 192.168.1.0/24 md5

-- Create standby server
pg_basebackup -h master_host -D /var/lib/postgresql/data -U replicator -P -W

-- Standby configuration (postgresql.conf)
hot_standby = on

-- Recovery configuration (standby.signal file + postgresql.conf)
primary_conninfo = 'host=master_host port=5432 user=replicator password=password'
primary_slot_name = 'standby_slot'

-- Create replication slot on master
SELECT pg_create_physical_replication_slot('standby_slot');

-- Monitor replication
SELECT * FROM pg_stat_replication;
SELECT pg_is_in_recovery();

-- Promote standby to master
pg_promote()

PostgreSQL's advanced features, extensibility, and robust architecture make it an excellent choice for applications requiring complex data operations, high reliability, and scalability. Its comprehensive SQL compliance, combined with powerful extensions and advanced data types, provides developers with the tools needed to build sophisticated database-driven applications across various domains from web applications to data analytics and geographic information systems.