Lecture 13¶
In [1]:
%reload_ext sql
In [2]:
!psql -h localhost -c 'DROP DATABASE IF EXISTS lec13'
!psql -h localhost -c 'CREATE DATABASE lec13'
DROP DATABASE CREATE DATABASE
In [3]:
%sql postgresql://127.0.0.1:5432/lec13
Window Functions¶
First set up the table.
In [4]:
%%sql
DROP TABLE IF EXISTS empsalary;
CREATE TABLE empsalary (
depname TEXT,
empno INTEGER,
salary INTEGER
);
INSERT INTO empsalary VALUES
('develop', 11, 5200),
('develop', 7, 4200),
('develop', 9, 4500),
('develop', 8, 6000),
('develop', 10, 5200),
('personnel', 5, 3500),
('personnel', 2, 3900),
('sales', 3, 4800),
('sales', 1, 5000),
('sales', 4, 4800)
;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[4]:
In [5]:
%%sql
SELECT depname, empno, salary,
avg(salary) OVER (PARTITION BY depname)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[5]:
depname | empno | salary | avg |
---|---|---|---|
develop | 11 | 5200 | 5020.0000000000000000 |
develop | 7 | 4200 | 5020.0000000000000000 |
develop | 9 | 4500 | 5020.0000000000000000 |
develop | 8 | 6000 | 5020.0000000000000000 |
develop | 10 | 5200 | 5020.0000000000000000 |
personnel | 5 | 3500 | 3700.0000000000000000 |
personnel | 2 | 3900 | 3700.0000000000000000 |
sales | 3 | 4800 | 4866.6666666666666667 |
sales | 1 | 5000 | 4866.6666666666666667 |
sales | 4 | 4800 | 4866.6666666666666667 |
In [6]:
%%sql
SELECT depname, empno, salary,
rank() OVER (PARTITION BY depname ORDER BY salary DESC)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[6]:
depname | empno | salary | rank |
---|---|---|---|
develop | 8 | 6000 | 1 |
develop | 10 | 5200 | 2 |
develop | 11 | 5200 | 2 |
develop | 9 | 4500 | 4 |
develop | 7 | 4200 | 5 |
personnel | 2 | 3900 | 1 |
personnel | 5 | 3500 | 2 |
sales | 1 | 5000 | 1 |
sales | 3 | 4800 | 2 |
sales | 4 | 4800 | 2 |
Window frames¶
In [7]:
%%sql
SELECT depname, salary, sum(salary) OVER () FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[7]:
depname | salary | sum |
---|---|---|
develop | 5200 | 47100 |
develop | 4200 | 47100 |
develop | 4500 | 47100 |
develop | 6000 | 47100 |
develop | 5200 | 47100 |
personnel | 3500 | 47100 |
personnel | 3900 | 47100 |
sales | 4800 | 47100 |
sales | 5000 | 47100 |
sales | 4800 | 47100 |
In [8]:
%%sql
SELECT depname, salary, sum(salary) OVER (ORDER BY salary) FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[8]:
depname | salary | sum |
---|---|---|
personnel | 3500 | 3500 |
personnel | 3900 | 7400 |
develop | 4200 | 11600 |
develop | 4500 | 16100 |
sales | 4800 | 25700 |
sales | 4800 | 25700 |
sales | 5000 | 30700 |
develop | 5200 | 41100 |
develop | 5200 | 41100 |
develop | 6000 | 47100 |
In [9]:
%%sql
SELECT depname, salary,
sum(salary) OVER (
ORDER BY salary
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[9]:
depname | salary | sum |
---|---|---|
personnel | 3500 | 3500 |
personnel | 3900 | 7400 |
develop | 4200 | 11600 |
develop | 4500 | 16100 |
sales | 4800 | 25700 |
sales | 4800 | 25700 |
sales | 5000 | 30700 |
develop | 5200 | 41100 |
develop | 5200 | 41100 |
develop | 6000 | 47100 |
In [10]:
%%sql
SELECT depname, salary,
sum(salary) OVER (
ORDER BY salary
RANGE BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[10]:
depname | salary | sum |
---|---|---|
personnel | 3500 | None |
personnel | 3900 | 3500 |
develop | 4200 | 7400 |
develop | 4500 | 11600 |
sales | 4800 | 16100 |
sales | 4800 | 16100 |
sales | 5000 | 25700 |
develop | 5200 | 30700 |
develop | 5200 | 30700 |
develop | 6000 | 41100 |
In [11]:
%%sql
SELECT depname, salary,
sum(salary) OVER (
PARTITION BY depname
ORDER BY salary)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[11]:
depname | salary | sum |
---|---|---|
develop | 4200 | 4200 |
develop | 4500 | 8700 |
develop | 5200 | 19100 |
develop | 5200 | 19100 |
develop | 6000 | 25100 |
personnel | 3500 | 3500 |
personnel | 3900 | 7400 |
sales | 4800 | 9600 |
sales | 4800 | 9600 |
sales | 5000 | 14600 |
For slide display purposes¶
In [12]:
%%sql
SELECT
depname, salary,
sum(salary) OVER () AS a,
sum(salary) OVER (ORDER BY salary) AS b,
/* sum(salary) OVER (
ORDER BY salary
RANGE BETWEEN UNBOUNDED
PRECEDING AND CURRENT ROW) AS b, */
sum(salary) OVER (PARTITION BY
depname ORDER BY salary) AS c
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[12]:
depname | salary | a | b | c |
---|---|---|---|---|
personnel | 3500 | 47100 | 3500 | 3500 |
personnel | 3900 | 47100 | 7400 | 7400 |
develop | 4200 | 47100 | 11600 | 4200 |
develop | 4500 | 47100 | 16100 | 8700 |
sales | 4800 | 47100 | 25700 | 9600 |
sales | 4800 | 47100 | 25700 | 9600 |
sales | 5000 | 47100 | 30700 | 14600 |
develop | 5200 | 47100 | 41100 | 19100 |
develop | 5200 | 47100 | 41100 | 19100 |
develop | 6000 | 47100 | 47100 | 25100 |
Recursive Queries¶
In [13]:
%%sql
WITH RECURSIVE t(n) AS (
VALUES (10)
UNION ALL
SELECT n-1 FROM t WHERE n > 0
)
SELECT n FROM t;
Running query in 'postgresql://127.0.0.1:5432/lec13'
11 rows affected.
Out[13]:
n |
---|
10 |
9 |
8 |
7 |
6 |
5 |
4 |
3 |
2 |
1 |
In [14]:
%%sql
/* A bit harder to understand */
WITH RECURSIVE t(n) AS (
VALUES (1)
UNION ALL
SELECT n+1 FROM t WHERE n < 10
)
SELECT n FROM t;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[14]:
n |
---|
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
In [15]:
%%sql
DROP TABLE IF EXISTS parts;
CREATE TABLE parts (
part TEXT, sub_part TEXT, quantity INTEGER
);
INSERT INTO parts VALUES
('A', 'A.1', 1),
('A', 'A.2', 2),
('A', 'A.3', 3),
('B', 'B.1', 1),
('B', 'B.2', 2),
('B.2', 'B.2.i', 10),
('C', 'C.1', 1),
('C.1', 'C.1.i', 10),
('C.1.i', 'C.1.i.a', 10),
('C.1.i', 'C.1.i.b', 20),
('C.1.i.a', 'C.1.i.a.1', 30)
;
Running query in 'postgresql://127.0.0.1:5432/lec13'
11 rows affected.
Out[15]:
In [16]:
%%sql
SELECT * FROM parts;
Running query in 'postgresql://127.0.0.1:5432/lec13'
11 rows affected.
Out[16]:
part | sub_part | quantity |
---|---|---|
A | A.1 | 1 |
A | A.2 | 2 |
A | A.3 | 3 |
B | B.1 | 1 |
B | B.2 | 2 |
B.2 | B.2.i | 10 |
C | C.1 | 1 |
C.1 | C.1.i | 10 |
C.1.i | C.1.i.a | 10 |
C.1.i | C.1.i.b | 20 |
In [17]:
%%sql
WITH RECURSIVE included_parts(sub_part, part, quantity, depth) AS (
SELECT sub_part, part, quantity, 0 FROM parts WHERE part = 'C'
UNION ALL
SELECT p.sub_part, p.part, p.quantity * pr.quantity, pr.depth + 1
FROM included_parts pr, parts p
WHERE p.part = pr.sub_part
)
SELECT sub_part, depth, SUM(quantity) as total_quantity
FROM included_parts
GROUP BY sub_part, depth
ORDER BY depth;
Running query in 'postgresql://127.0.0.1:5432/lec13'
5 rows affected.
Out[17]:
sub_part | depth | total_quantity |
---|---|---|
C.1 | 0 | 1 |
C.1.i | 1 | 10 |
C.1.i.b | 2 | 200 |
C.1.i.a | 2 | 100 |
C.1.i.a.1 | 3 | 3000 |
CTEs¶
In [18]:
%config SqlMagic.displaylimit=30
In [19]:
%%sql
DROP TABLE IF EXISTS orders;
CREATE TABLE orders (
region TEXT,
product TEXT,
quantity INTEGER,
amount FLOAT
);
INSERT INTO orders VALUES
('US', 'apple', 100, 175.40),
('US', 'apple', 80, 99.70),
('US', 'banana', 50, 80.60),
('US', 'cheerios', 1000, 2500.95),
('Canada', 'apple', 500, 198.00),
('Canada', 'banana', 20, 45.50),
('Mexico', 'apple', 1000, 1100.39),
('Mexico', 'cheerios', 200, 500.50),
('Mexico', 'cheerios', 400, 430.65),
('Australia', 'banana', 40, 80.00),
('Australia', 'banana', 50, 99.45),
('Australia', 'banana', 2000, 2100.60),
('Australia', 'banana', 60, 75.00),
('Australia', 'cheerios', 500, 760.94)
;
SELECT * FROM orders;
Running query in 'postgresql://127.0.0.1:5432/lec13'
14 rows affected.
14 rows affected.
Out[19]:
region | product | quantity | amount |
---|---|---|---|
US | apple | 100 | 175.4 |
US | apple | 80 | 99.7 |
US | banana | 50 | 80.6 |
US | cheerios | 1000 | 2500.95 |
Canada | apple | 500 | 198.0 |
Canada | banana | 20 | 45.5 |
Mexico | apple | 1000 | 1100.39 |
Mexico | cheerios | 200 | 500.5 |
Mexico | cheerios | 400 | 430.65 |
Australia | banana | 40 | 80.0 |
Australia | banana | 50 | 99.45 |
Australia | banana | 2000 | 2100.6 |
Australia | banana | 60 | 75.0 |
Australia | cheerios | 500 | 760.94 |
In [20]:
%%sql
WITH regional_sales AS (
SELECT region, SUM(amount) AS total_sales
FROM orders
GROUP BY region
), top_regions AS (
SELECT region
FROM regional_sales
WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales)
)
SELECT region,
product,
SUM(quantity) AS product_units,
SUM(amount) AS product_sales
FROM orders
WHERE region IN (SELECT region FROM top_regions)
GROUP BY region, product;
Running query in 'postgresql://127.0.0.1:5432/lec13'
7 rows affected.
Out[20]:
region | product | product_units | product_sales |
---|---|---|---|
Mexico | apple | 1000 | 1100.39 |
US | apple | 180 | 275.1 |
Australia | cheerios | 500 | 760.94 |
US | banana | 50 | 80.6 |
Mexico | cheerios | 600 | 931.15 |
US | cheerios | 1000 | 2500.95 |
Australia | banana | 2150 | 2355.0499999999997 |