Skip to content

SELECT

SELECT [DISTINCT [ON (expr, ...)]] select_list
FROM table_reference
[WHERE condition]
[GROUP BY expression [, ...]]
[HAVING condition]
[WINDOW window_name AS (window_definition) [, ...]]
[{ UNION | INTERSECT | EXCEPT } [ALL] select]
[ORDER BY expression [ASC | DESC] [NULLS { FIRST | LAST }] [, ...]]
[LIMIT count]
[OFFSET start]
[FOR { UPDATE | SHARE } [OF table_name] [NOWAIT | SKIP LOCKED]]

Select specific columns, expressions, or all columns with *:

-- All columns
SELECT * FROM products;
-- Specific columns
SELECT name, price FROM products;
-- Expressions with aliases
SELECT
name,
price * 1.1 AS price_with_tax,
UPPER(category) AS category_upper
FROM products;
-- Constants and functions
SELECT 42 AS answer, NOW() AS current_time;

Use AS (or just a space) to name output columns:

SELECT
first_name || ' ' || last_name AS full_name,
EXTRACT(YEAR FROM hire_date) hire_year
FROM employees;

Filter rows with a boolean predicate. Only rows where the condition evaluates to TRUE are returned.

SELECT name, price
FROM products
WHERE price > 50 AND category = 'electronics';
WHERE age = 30 -- equal
WHERE age != 30 -- not equal
WHERE age <> 30 -- not equal (alternate)
WHERE price < 100 -- less than
WHERE price <= 100 -- less than or equal
WHERE price > 10 -- greater than
WHERE price >= 10 -- greater than or equal
WHERE price > 10 AND price < 50
WHERE status = 'active' OR status = 'pending'
WHERE NOT is_deleted

Inclusive range test:

SELECT * FROM orders
WHERE total BETWEEN 100 AND 500;
-- Equivalent to:
-- WHERE total >= 100 AND total <= 500

Test membership in a list of values:

SELECT * FROM products
WHERE category IN ('electronics', 'books', 'clothing');
-- Negated
SELECT * FROM products
WHERE category NOT IN ('discontinued', 'recalled');

Pattern matching with wildcards. % matches any sequence of characters, _ matches any single character.

-- Case-sensitive
SELECT * FROM users WHERE email LIKE '%@gmail.com';
-- Case-insensitive
SELECT * FROM users WHERE name ILIKE '%smith%';
-- Single character wildcard
SELECT * FROM products WHERE sku LIKE 'A-___'; -- matches A-001, A-xyz, etc.
-- Negated
SELECT * FROM users WHERE email NOT LIKE '%@spam.%';
SELECT * FROM users WHERE phone IS NULL;
SELECT * FROM users WHERE email IS NOT NULL;

Conditional logic within a query:

SELECT
name,
price,
CASE
WHEN price < 10 THEN 'budget'
WHEN price < 100 THEN 'mid-range'
ELSE 'premium'
END AS tier
FROM products;

Remove duplicate rows from the result:

-- Distinct on all columns
SELECT DISTINCT category FROM products;
-- DISTINCT ON (PostgreSQL extension): keep first row per group
SELECT DISTINCT ON (customer_id) customer_id, order_id, total
FROM orders
ORDER BY customer_id, created_at DESC;

Sort the result set. Each sort key can specify direction and null ordering:

SELECT name, price, created_at
FROM products
ORDER BY price DESC, name ASC;

Control where nulls appear in sorted output:

-- Nulls at the end (default for ASC)
SELECT * FROM products ORDER BY rating ASC NULLS LAST;
-- Nulls at the beginning
SELECT * FROM products ORDER BY rating DESC NULLS FIRST;
SELECT name, price * quantity AS line_total
FROM order_items
ORDER BY price * quantity DESC;
-- Or reference the alias
SELECT name, price * quantity AS line_total
FROM order_items
ORDER BY line_total DESC;

Restrict the number of rows returned:

-- First 10 rows
SELECT * FROM products ORDER BY created_at DESC LIMIT 10;
-- Skip 20, then return 10 (page 3 of a paginated result)
SELECT * FROM products ORDER BY id LIMIT 10 OFFSET 20;

ParticleDB optimizes LIMIT on top of ORDER BY using Top-K partial sort (O(N log K) instead of O(N log N)).


Return a random sample of rows from a table:

-- Sample approximately 10% of rows (row-level Bernoulli sampling)
SELECT * FROM events TABLESAMPLE BERNOULLI(10);
-- Block-level sampling (faster, less uniform)
SELECT * FROM events TABLESAMPLE SYSTEM(5);
-- Repeatable sampling with a seed for deterministic results
SELECT * FROM events TABLESAMPLE BERNOULLI(10) REPEATABLE(42);

A subquery that returns a single value:

SELECT
name,
price,
price - (SELECT AVG(price) FROM products) AS diff_from_avg
FROM products;

Test membership against a subquery result:

SELECT * FROM customers
WHERE id IN (SELECT customer_id FROM orders WHERE total > 1000);
SELECT * FROM products
WHERE id NOT IN (SELECT product_id FROM discontinued);

Test whether a subquery returns any rows:

-- Customers who have placed at least one order
SELECT c.name
FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o WHERE o.customer_id = c.id
);
-- Customers with no orders
SELECT c.name
FROM customers c
WHERE NOT EXISTS (
SELECT 1 FROM orders o WHERE o.customer_id = c.id
);

Compare a value to rows from a subquery:

-- Products more expensive than at least one premium product
SELECT * FROM products
WHERE price > ANY (SELECT price FROM products WHERE category = 'premium');
-- Products more expensive than ALL budget products
SELECT * FROM products
WHERE price > ALL (SELECT price FROM products WHERE category = 'budget');

Use a subquery as a table in the FROM clause:

SELECT category, avg_price
FROM (
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category
) sub
WHERE avg_price > 50
ORDER BY avg_price DESC;

A lateral subquery can reference columns from preceding tables in the FROM clause:

SELECT c.name, recent.total
FROM customers c,
LATERAL (
SELECT total FROM orders
WHERE customer_id = c.id
ORDER BY created_at DESC
LIMIT 1
) recent;

Define named subqueries for readability and reuse:

WITH
active_customers AS (
SELECT id, name FROM customers WHERE status = 'active'
),
recent_orders AS (
SELECT customer_id, SUM(total) AS total_spent
FROM orders
WHERE created_at > CURRENT_TIMESTAMP - 86400000000 -- last 24h in microseconds
GROUP BY customer_id
)
SELECT ac.name, ro.total_spent
FROM active_customers ac
JOIN recent_orders ro ON ac.id = ro.customer_id
ORDER BY ro.total_spent DESC;

Recursive CTEs for hierarchical or graph queries:

-- Org chart: find all reports under manager #1
WITH RECURSIVE reports AS (
-- Anchor: the manager
SELECT id, name, manager_id, 1 AS depth
FROM employees
WHERE id = 1
UNION ALL
-- Recursive: their direct reports
SELECT e.id, e.name, e.manager_id, r.depth + 1
FROM employees e
JOIN reports r ON e.manager_id = r.id
WHERE r.depth < 10
)
SELECT * FROM reports ORDER BY depth, name;

Combine results from multiple queries:

-- Deduplicated (removes duplicates)
SELECT name FROM customers
UNION
SELECT name FROM suppliers;
-- Keep all rows including duplicates
SELECT name FROM customers
UNION ALL
SELECT name FROM suppliers;

Rows that appear in both queries:

SELECT customer_id FROM orders_2024
INTERSECT
SELECT customer_id FROM orders_2025;

Rows from the first query that don’t appear in the second:

-- Customers from 2024 who didn't order in 2025
SELECT customer_id FROM orders_2024
EXCEPT
SELECT customer_id FROM orders_2025;

Group rows and apply aggregate functions. See Aggregations for the full list of aggregate functions.

SELECT
category,
COUNT(*) AS num_products,
AVG(price) AS avg_price,
MAX(price) AS max_price
FROM products
GROUP BY category
HAVING COUNT(*) >= 5
ORDER BY avg_price DESC;

Compute aggregates for multiple grouping levels in a single pass:

-- ROLLUP: hierarchical subtotals
SELECT region, category, SUM(revenue)
FROM sales
GROUP BY ROLLUP(region, category);
-- CUBE: all combinations
SELECT region, category, SUM(revenue)
FROM sales
GROUP BY CUBE(region, category);
-- GROUPING SETS: specific combinations
SELECT region, category, SUM(revenue)
FROM sales
GROUP BY GROUPING SETS ((region, category), (region), ());

Window functions compute values across a set of rows related to the current row, without collapsing rows like GROUP BY.

function_name(args) OVER (
[PARTITION BY partition_expr [, ...]]
[ORDER BY sort_expr [ASC | DESC] [NULLS { FIRST | LAST }] [, ...]]
[frame_clause]
)
SELECT
department,
name,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
NTILE(4) OVER (PARTITION BY department ORDER BY salary DESC) AS quartile,
PERCENT_RANK() OVER (PARTITION BY department ORDER BY salary) AS pct_rank,
CUME_DIST() OVER (PARTITION BY department ORDER BY salary) AS cum_dist
FROM employees;
FunctionDescription
ROW_NUMBER()Unique sequential integer per partition, no ties
RANK()Rank with gaps (1, 2, 2, 4)
DENSE_RANK()Rank without gaps (1, 2, 2, 3)
NTILE(n)Distribute rows into n roughly equal buckets
PERCENT_RANK()Relative rank: (rank - 1) / (total_rows - 1)
CUME_DIST()Cumulative distribution: fraction of rows ≤ current
SELECT
date,
revenue,
LAG(revenue, 1, 0) OVER (ORDER BY date) AS prev_day,
LEAD(revenue, 1, 0) OVER (ORDER BY date) AS next_day,
FIRST_VALUE(revenue) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS first_rev,
LAST_VALUE(revenue) OVER (ORDER BY date ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS last_rev,
NTH_VALUE(revenue, 3) OVER (ORDER BY date) AS third_rev
FROM daily_revenue;
FunctionDescription
LAG(expr [, offset [, default]])Value from offset rows before current (default: 1)
LEAD(expr [, offset [, default]])Value from offset rows after current (default: 1)
FIRST_VALUE(expr)First value in the window frame
LAST_VALUE(expr)Last value in the window frame
NTH_VALUE(expr, n)Value at position n in the window frame

Any aggregate function can be used as a window function:

SELECT
date,
revenue,
SUM(revenue) OVER (ORDER BY date) AS running_total,
AVG(revenue) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS week_avg,
COUNT(*) OVER (PARTITION BY region) AS region_count
FROM daily_revenue;

Control which rows are included in the window frame:

-- Syntax
{ ROWS | RANGE | GROUPS } BETWEEN frame_start AND frame_end
-- Frame boundaries
UNBOUNDED PRECEDING -- from the beginning of the partition
n PRECEDING -- n rows before current
CURRENT ROW -- the current row
n FOLLOWING -- n rows after current
UNBOUNDED FOLLOWING -- to the end of the partition

Examples:

-- Running total (all rows up to current)
SUM(x) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
-- 7-day moving average
AVG(x) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)
-- Centered 5-row window
AVG(x) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)

ParticleDB includes built-in window functions designed for time-series analysis. These operate on ordered partitions and are typically used with an ORDER BY on a timestamp column.

FunctionDescription
MOVING_AVG(expr [, window_size])Moving average over the last window_size rows (default varies)
MOVING_SUM(expr [, window_size])Moving sum over the last window_size rows
EXP_MOVING_AVG(expr [, alpha])Exponential moving average with smoothing factor alpha (0 to 1)
DELTA(expr)Difference between the current row and the previous row
RATE(expr)Rate of change from the previous row
SELECT
timestamp,
cpu_usage,
MOVING_AVG(cpu_usage, 5) OVER (ORDER BY timestamp) AS avg_5,
MOVING_SUM(cpu_usage, 5) OVER (ORDER BY timestamp) AS sum_5,
EXP_MOVING_AVG(cpu_usage, 0.3) OVER (ORDER BY timestamp) AS ema,
DELTA(cpu_usage) OVER (ORDER BY timestamp) AS change,
RATE(cpu_usage) OVER (ORDER BY timestamp) AS rate
FROM metrics;

Define a window once and reference it multiple times:

SELECT
name,
salary,
RANK() OVER w AS rank,
SUM(salary) OVER w AS running_total,
AVG(salary) OVER w AS running_avg
FROM employees
WINDOW w AS (PARTITION BY department ORDER BY salary DESC);

Lock selected rows for the duration of the current transaction:

-- Exclusive lock (blocks other FOR UPDATE and FOR SHARE)
SELECT * FROM accounts WHERE id = 42 FOR UPDATE;
-- Shared lock (blocks FOR UPDATE, allows other FOR SHARE)
SELECT * FROM accounts WHERE id = 42 FOR SHARE;
-- Lock only specific tables in a join
SELECT * FROM orders o JOIN items i ON o.id = i.order_id
FOR UPDATE OF orders;
-- Error immediately if rows are already locked
SELECT * FROM jobs WHERE status = 'pending'
ORDER BY created_at LIMIT 1
FOR UPDATE NOWAIT;
-- Skip over locked rows (useful for job queues)
SELECT * FROM jobs WHERE status = 'pending'
ORDER BY created_at LIMIT 1
FOR UPDATE SKIP LOCKED;

Use functions that return rows in the FROM clause:

-- Generate a series of numbers
SELECT * FROM GENERATE_SERIES(1, 100) AS t(n);
-- Generate a series with step
SELECT * FROM GENERATE_SERIES(0, 1000, 50) AS t(n);
-- Expand an array into rows
SELECT val FROM UNNEST(ARRAY[10, 20, 30]) AS t(val);

Use VALUES as a standalone row constructor:

VALUES (1, 'alice'), (2, 'bob'), (3, 'carol');

View the query execution plan:

-- Logical plan
EXPLAIN SELECT * FROM orders WHERE total > 100;
-- Execute and show actual timings
EXPLAIN ANALYZE SELECT COUNT(*) FROM orders GROUP BY status;