SELECT

Basic Syntax

SELECT [DISTINCT [ON (expr, ...)]] select_list
FROM table_reference
[WHERE condition]
[GROUP BY expression [, ...]]
[HAVING condition]
[WINDOW window_name AS (window_definition) [, ...]]
[{ UNION | INTERSECT | EXCEPT } [ALL] select]
[ORDER BY expression [ASC | DESC] [NULLS { FIRST | LAST }] [, ...]]
[LIMIT count]
[OFFSET start]
[FOR { UPDATE | SHARE } [OF table_name] [NOWAIT | SKIP LOCKED]]

SELECT List

Select specific columns, expressions, or all columns with *:

-- All columns
SELECT * FROM products;

-- Specific columns
SELECT name, price FROM products;

-- Expressions with aliases
SELECT
    name,
    price * 1.1 AS price_with_tax,
    UPPER(category) AS category_upper
FROM products;

-- Constants and functions
SELECT 42 AS answer, NOW() AS current_time;

Column Aliases

Use AS (or just a space) to name output columns:

SELECT
    first_name || ' ' || last_name AS full_name,
    EXTRACT(YEAR FROM hire_date) hire_year
FROM employees;

WHERE

Filter rows with a boolean predicate. Only rows where the condition evaluates to TRUE are returned.

SELECT name, price
FROM products
WHERE price > 50 AND category = 'electronics';

Comparison Operators

WHERE age = 30               -- equal
WHERE age != 30              -- not equal
WHERE age <> 30              -- not equal (alternate)
WHERE price < 100            -- less than
WHERE price <= 100           -- less than or equal
WHERE price > 10             -- greater than
WHERE price >= 10            -- greater than or equal

Logical Operators

WHERE price > 10 AND price < 50
WHERE status = 'active' OR status = 'pending'
WHERE NOT is_deleted

BETWEEN

Inclusive range test:

SELECT * FROM orders
WHERE total BETWEEN 100 AND 500;

-- Equivalent to:
-- WHERE total >= 100 AND total <= 500

IN

Test membership in a list of values:

SELECT * FROM products
WHERE category IN ('electronics', 'books', 'clothing');

-- Negated
SELECT * FROM products
WHERE category NOT IN ('discontinued', 'recalled');

LIKE and ILIKE

Pattern matching with wildcards. % matches any sequence of characters, _ matches any single character.

-- Case-sensitive
SELECT * FROM users WHERE email LIKE '%@gmail.com';

-- Case-insensitive
SELECT * FROM users WHERE name ILIKE '%smith%';

-- Single character wildcard
SELECT * FROM products WHERE sku LIKE 'A-___';  -- matches A-001, A-xyz, etc.

-- Negated
SELECT * FROM users WHERE email NOT LIKE '%@spam.%';

IS NULL / IS NOT NULL

SELECT * FROM users WHERE phone IS NULL;
SELECT * FROM users WHERE email IS NOT NULL;

CASE Expressions

Conditional logic within a query:

SELECT
    name,
    price,
    CASE
        WHEN price < 10 THEN 'budget'
        WHEN price < 100 THEN 'mid-range'
        ELSE 'premium'
    END AS tier
FROM products;

DISTINCT

Remove duplicate rows from the result:

-- Distinct on all columns
SELECT DISTINCT category FROM products;

-- DISTINCT ON (PostgreSQL extension): keep first row per group
SELECT DISTINCT ON (customer_id) customer_id, order_id, total
FROM orders
ORDER BY customer_id, created_at DESC;

ORDER BY

Sort the result set. Each sort key can specify direction and null ordering:

SELECT name, price, created_at
FROM products
ORDER BY price DESC, name ASC;

NULLS FIRST / NULLS LAST

Control where nulls appear in sorted output:

-- Nulls at the end (default for ASC)
SELECT * FROM products ORDER BY rating ASC NULLS LAST;

-- Nulls at the beginning
SELECT * FROM products ORDER BY rating DESC NULLS FIRST;

Ordering by Expression

SELECT name, price * quantity AS line_total
FROM order_items
ORDER BY price * quantity DESC;

-- Or reference the alias
SELECT name, price * quantity AS line_total
FROM order_items
ORDER BY line_total DESC;

LIMIT and OFFSET

Restrict the number of rows returned:

-- First 10 rows
SELECT * FROM products ORDER BY created_at DESC LIMIT 10;

-- Skip 20, then return 10 (page 3 of a paginated result)
SELECT * FROM products ORDER BY id LIMIT 10 OFFSET 20;

ParticleDB optimizes LIMIT on top of ORDER BY using Top-K partial sort (O(N log K) instead of O(N log N)).

TABLESAMPLE

Return a random sample of rows from a table:

-- Sample approximately 10% of rows (row-level Bernoulli sampling)
SELECT * FROM events TABLESAMPLE BERNOULLI(10);

-- Block-level sampling (faster, less uniform)
SELECT * FROM events TABLESAMPLE SYSTEM(5);

-- Repeatable sampling with a seed for deterministic results
SELECT * FROM events TABLESAMPLE BERNOULLI(10) REPEATABLE(42);

Subqueries

Scalar Subquery

A subquery that returns a single value:

SELECT
    name,
    price,
    price - (SELECT AVG(price) FROM products) AS diff_from_avg
FROM products;

IN Subquery

Test membership against a subquery result:

SELECT * FROM customers
WHERE id IN (SELECT customer_id FROM orders WHERE total > 1000);

SELECT * FROM products
WHERE id NOT IN (SELECT product_id FROM discontinued);

EXISTS Subquery

Test whether a subquery returns any rows:

-- Customers who have placed at least one order
SELECT c.name
FROM customers c
WHERE EXISTS (
    SELECT 1 FROM orders o WHERE o.customer_id = c.id
);

-- Customers with no orders
SELECT c.name
FROM customers c
WHERE NOT EXISTS (
    SELECT 1 FROM orders o WHERE o.customer_id = c.id
);

ANY / ALL

Compare a value to rows from a subquery:

-- Products more expensive than at least one premium product
SELECT * FROM products
WHERE price > ANY (SELECT price FROM products WHERE category = 'premium');

-- Products more expensive than ALL budget products
SELECT * FROM products
WHERE price > ALL (SELECT price FROM products WHERE category = 'budget');

FROM Subquery (Derived Table)

Use a subquery as a table in the FROM clause:

SELECT category, avg_price
FROM (
    SELECT category, AVG(price) AS avg_price
    FROM products
    GROUP BY category
) sub
WHERE avg_price > 50
ORDER BY avg_price DESC;

LATERAL Subquery

A lateral subquery can reference columns from preceding tables in the FROM clause:

SELECT c.name, recent.total
FROM customers c,
LATERAL (
    SELECT total FROM orders
    WHERE customer_id = c.id
    ORDER BY created_at DESC
    LIMIT 1
) recent;

Common Table Expressions (WITH)

Define named subqueries for readability and reuse:

WITH
    active_customers AS (
        SELECT id, name FROM customers WHERE status = 'active'
    ),
    recent_orders AS (
        SELECT customer_id, SUM(total) AS total_spent
        FROM orders
        WHERE created_at > CURRENT_TIMESTAMP - 86400000000  -- last 24h in microseconds
        GROUP BY customer_id
    )
SELECT ac.name, ro.total_spent
FROM active_customers ac
JOIN recent_orders ro ON ac.id = ro.customer_id
ORDER BY ro.total_spent DESC;

WITH RECURSIVE

Recursive CTEs for hierarchical or graph queries:

-- Org chart: find all reports under manager #1
WITH RECURSIVE reports AS (
    -- Anchor: the manager
    SELECT id, name, manager_id, 1 AS depth
    FROM employees
    WHERE id = 1

    UNION ALL

    -- Recursive: their direct reports
    SELECT e.id, e.name, e.manager_id, r.depth + 1
    FROM employees e
    JOIN reports r ON e.manager_id = r.id
    WHERE r.depth < 10
)
SELECT * FROM reports ORDER BY depth, name;

Set Operations

Combine results from multiple queries:

UNION / UNION ALL

-- Deduplicated (removes duplicates)
SELECT name FROM customers
UNION
SELECT name FROM suppliers;

-- Keep all rows including duplicates
SELECT name FROM customers
UNION ALL
SELECT name FROM suppliers;

INTERSECT

Rows that appear in both queries:

SELECT customer_id FROM orders_2024
INTERSECT
SELECT customer_id FROM orders_2025;

EXCEPT

Rows from the first query that don’t appear in the second:

-- Customers from 2024 who didn't order in 2025
SELECT customer_id FROM orders_2024
EXCEPT
SELECT customer_id FROM orders_2025;

GROUP BY and HAVING

Group rows and apply aggregate functions. See Aggregations for the full list of aggregate functions.

SELECT
    category,
    COUNT(*) AS num_products,
    AVG(price) AS avg_price,
    MAX(price) AS max_price
FROM products
GROUP BY category
HAVING COUNT(*) >= 5
ORDER BY avg_price DESC;

GROUPING SETS / ROLLUP / CUBE

Compute aggregates for multiple grouping levels in a single pass:

-- ROLLUP: hierarchical subtotals
SELECT region, category, SUM(revenue)
FROM sales
GROUP BY ROLLUP(region, category);

-- CUBE: all combinations
SELECT region, category, SUM(revenue)
FROM sales
GROUP BY CUBE(region, category);

-- GROUPING SETS: specific combinations
SELECT region, category, SUM(revenue)
FROM sales
GROUP BY GROUPING SETS ((region, category), (region), ());

Window Functions

Window functions compute values across a set of rows related to the current row, without collapsing rows like GROUP BY.

Syntax

function_name(args) OVER (
    [PARTITION BY partition_expr [, ...]]
    [ORDER BY sort_expr [ASC | DESC] [NULLS { FIRST | LAST }] [, ...]]
    [frame_clause]
)

Ranking Functions

SELECT
    department,
    name,
    salary,
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num,
    RANK()       OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
    DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
    NTILE(4)     OVER (PARTITION BY department ORDER BY salary DESC) AS quartile,
    PERCENT_RANK() OVER (PARTITION BY department ORDER BY salary) AS pct_rank,
    CUME_DIST()    OVER (PARTITION BY department ORDER BY salary) AS cum_dist
FROM employees;

Function	Description
`ROW_NUMBER()`	Unique sequential integer per partition, no ties
`RANK()`	Rank with gaps (1, 2, 2, 4)
`DENSE_RANK()`	Rank without gaps (1, 2, 2, 3)
`NTILE(n)`	Distribute rows into `n` roughly equal buckets
`PERCENT_RANK()`	Relative rank: `(rank - 1) / (total_rows - 1)`
`CUME_DIST()`	Cumulative distribution: fraction of rows ≤ current

Value Functions

SELECT
    date,
    revenue,
    LAG(revenue, 1, 0)  OVER (ORDER BY date) AS prev_day,
    LEAD(revenue, 1, 0) OVER (ORDER BY date) AS next_day,
    FIRST_VALUE(revenue) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS first_rev,
    LAST_VALUE(revenue)  OVER (ORDER BY date ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS last_rev,
    NTH_VALUE(revenue, 3) OVER (ORDER BY date) AS third_rev
FROM daily_revenue;

Function	Description
`LAG(expr [, offset [, default]])`	Value from `offset` rows before current (default: 1)
`LEAD(expr [, offset [, default]])`	Value from `offset` rows after current (default: 1)
`FIRST_VALUE(expr)`	First value in the window frame
`LAST_VALUE(expr)`	Last value in the window frame
`NTH_VALUE(expr, n)`	Value at position `n` in the window frame

Aggregate Window Functions

Any aggregate function can be used as a window function:

SELECT
    date,
    revenue,
    SUM(revenue) OVER (ORDER BY date) AS running_total,
    AVG(revenue) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS week_avg,
    COUNT(*)     OVER (PARTITION BY region) AS region_count
FROM daily_revenue;

Window Frame Clause

Control which rows are included in the window frame:

-- Syntax
{ ROWS | RANGE | GROUPS } BETWEEN frame_start AND frame_end

-- Frame boundaries
UNBOUNDED PRECEDING    -- from the beginning of the partition
n PRECEDING            -- n rows before current
CURRENT ROW            -- the current row
n FOLLOWING            -- n rows after current
UNBOUNDED FOLLOWING    -- to the end of the partition

Examples:

-- Running total (all rows up to current)
SUM(x) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)

-- 7-day moving average
AVG(x) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)

-- Centered 5-row window
AVG(x) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)

Time-Series Window Functions

ParticleDB includes built-in window functions designed for time-series analysis. These operate on ordered partitions and are typically used with an ORDER BY on a timestamp column.

Function	Description
`MOVING_AVG(expr [, window_size])`	Moving average over the last `window_size` rows (default varies)
`MOVING_SUM(expr [, window_size])`	Moving sum over the last `window_size` rows
`EXP_MOVING_AVG(expr [, alpha])`	Exponential moving average with smoothing factor `alpha` (0 to 1)
`DELTA(expr)`	Difference between the current row and the previous row
`RATE(expr)`	Rate of change from the previous row

SELECT
    timestamp,
    cpu_usage,
    MOVING_AVG(cpu_usage, 5) OVER (ORDER BY timestamp) AS avg_5,
    MOVING_SUM(cpu_usage, 5) OVER (ORDER BY timestamp) AS sum_5,
    EXP_MOVING_AVG(cpu_usage, 0.3) OVER (ORDER BY timestamp) AS ema,
    DELTA(cpu_usage) OVER (ORDER BY timestamp) AS change,
    RATE(cpu_usage) OVER (ORDER BY timestamp) AS rate
FROM metrics;

Named Windows

Define a window once and reference it multiple times:

SELECT
    name,
    salary,
    RANK()         OVER w AS rank,
    SUM(salary)    OVER w AS running_total,
    AVG(salary)    OVER w AS running_avg
FROM employees
WINDOW w AS (PARTITION BY department ORDER BY salary DESC);

Lock selected rows for the duration of the current transaction:

-- Exclusive lock (blocks other FOR UPDATE and FOR SHARE)
SELECT * FROM accounts WHERE id = 42 FOR UPDATE;

-- Shared lock (blocks FOR UPDATE, allows other FOR SHARE)
SELECT * FROM accounts WHERE id = 42 FOR SHARE;

-- Lock only specific tables in a join
SELECT * FROM orders o JOIN items i ON o.id = i.order_id
FOR UPDATE OF orders;

NOWAIT and SKIP LOCKED

-- Error immediately if rows are already locked
SELECT * FROM jobs WHERE status = 'pending'
ORDER BY created_at LIMIT 1
FOR UPDATE NOWAIT;

-- Skip over locked rows (useful for job queues)
SELECT * FROM jobs WHERE status = 'pending'
ORDER BY created_at LIMIT 1
FOR UPDATE SKIP LOCKED;

Table-Valued Functions

Use functions that return rows in the FROM clause:

-- Generate a series of numbers
SELECT * FROM GENERATE_SERIES(1, 100) AS t(n);

-- Generate a series with step
SELECT * FROM GENERATE_SERIES(0, 1000, 50) AS t(n);

-- Expand an array into rows
SELECT val FROM UNNEST(ARRAY[10, 20, 30]) AS t(val);

VALUES

Use VALUES as a standalone row constructor:

VALUES (1, 'alice'), (2, 'bob'), (3, 'carol');

EXPLAIN

View the query execution plan:

-- Logical plan
EXPLAIN SELECT * FROM orders WHERE total > 100;

-- Execute and show actual timings
EXPLAIN ANALYZE SELECT COUNT(*) FROM orders GROUP BY status;