Lecture 13¶

In [1]:
%reload_ext sql
In [2]:
!psql -h localhost -c 'DROP DATABASE IF EXISTS lec13'
!psql -h localhost -c 'CREATE DATABASE lec13' 
DROP DATABASE
CREATE DATABASE
In [3]:
%sql postgresql://127.0.0.1:5432/lec13

Window Functions¶

First set up the table.

In [4]:
%%sql
DROP TABLE IF EXISTS empsalary;
CREATE TABLE empsalary (
    depname TEXT,
    empno INTEGER,
    salary INTEGER
);

INSERT INTO empsalary VALUES
    ('develop',  11, 5200),
    ('develop',   7, 4200),
    ('develop',   9, 4500),
    ('develop',   8, 6000),
    ('develop',  10, 5200),
    ('personnel', 5, 3500),
    ('personnel', 2, 3900),
    ('sales',     3, 4800),
    ('sales',     1, 5000),
    ('sales',     4, 4800)
;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[4]:
In [5]:
%%sql
SELECT depname, empno, salary,
       avg(salary) OVER (PARTITION BY depname)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[5]:
depname empno salary avg
develop 11 5200 5020.0000000000000000
develop 7 4200 5020.0000000000000000
develop 9 4500 5020.0000000000000000
develop 8 6000 5020.0000000000000000
develop 10 5200 5020.0000000000000000
personnel 5 3500 3700.0000000000000000
personnel 2 3900 3700.0000000000000000
sales 3 4800 4866.6666666666666667
sales 1 5000 4866.6666666666666667
sales 4 4800 4866.6666666666666667
Truncated to displaylimit of 10.
In [6]:
%%sql
SELECT depname, empno, salary,
       rank() OVER (PARTITION BY depname ORDER BY salary DESC)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[6]:
depname empno salary rank
develop 8 6000 1
develop 10 5200 2
develop 11 5200 2
develop 9 4500 4
develop 7 4200 5
personnel 2 3900 1
personnel 5 3500 2
sales 1 5000 1
sales 3 4800 2
sales 4 4800 2
Truncated to displaylimit of 10.

Window frames¶

In [7]:
%%sql
SELECT depname, salary, sum(salary) OVER () FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[7]:
depname salary sum
develop 5200 47100
develop 4200 47100
develop 4500 47100
develop 6000 47100
develop 5200 47100
personnel 3500 47100
personnel 3900 47100
sales 4800 47100
sales 5000 47100
sales 4800 47100
Truncated to displaylimit of 10.
In [8]:
%%sql
SELECT depname, salary, sum(salary) OVER (ORDER BY salary) FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[8]:
depname salary sum
personnel 3500 3500
personnel 3900 7400
develop 4200 11600
develop 4500 16100
sales 4800 25700
sales 4800 25700
sales 5000 30700
develop 5200 41100
develop 5200 41100
develop 6000 47100
Truncated to displaylimit of 10.
In [9]:
%%sql
SELECT depname, salary,
   sum(salary) OVER (
       ORDER BY salary
       RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[9]:
depname salary sum
personnel 3500 3500
personnel 3900 7400
develop 4200 11600
develop 4500 16100
sales 4800 25700
sales 4800 25700
sales 5000 30700
develop 5200 41100
develop 5200 41100
develop 6000 47100
Truncated to displaylimit of 10.
In [10]:
%%sql
SELECT depname, salary,
       sum(salary) OVER (
           ORDER BY salary
           RANGE BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[10]:
depname salary sum
personnel 3500 None
personnel 3900 3500
develop 4200 7400
develop 4500 11600
sales 4800 16100
sales 4800 16100
sales 5000 25700
develop 5200 30700
develop 5200 30700
develop 6000 41100
Truncated to displaylimit of 10.
In [11]:
%%sql
SELECT depname, salary,
       sum(salary) OVER (
           PARTITION BY depname
           ORDER BY salary)
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[11]:
depname salary sum
develop 4200 4200
develop 4500 8700
develop 5200 19100
develop 5200 19100
develop 6000 25100
personnel 3500 3500
personnel 3900 7400
sales 4800 9600
sales 4800 9600
sales 5000 14600
Truncated to displaylimit of 10.

For slide display purposes¶

In [12]:
%%sql
SELECT
    depname, salary,
    sum(salary) OVER () AS a,
    sum(salary) OVER (ORDER BY salary) AS b,
/*    sum(salary) OVER (
      ORDER BY salary
      RANGE BETWEEN UNBOUNDED 
        PRECEDING AND CURRENT ROW) AS b, */
    sum(salary) OVER (PARTITION BY 
      depname ORDER BY salary) AS c
FROM empsalary;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[12]:
depname salary a b c
personnel 3500 47100 3500 3500
personnel 3900 47100 7400 7400
develop 4200 47100 11600 4200
develop 4500 47100 16100 8700
sales 4800 47100 25700 9600
sales 4800 47100 25700 9600
sales 5000 47100 30700 14600
develop 5200 47100 41100 19100
develop 5200 47100 41100 19100
develop 6000 47100 47100 25100
Truncated to displaylimit of 10.

Recursive Queries¶

In [13]:
%%sql
WITH RECURSIVE t(n) AS (
    VALUES (10)
  UNION ALL
    SELECT n-1 FROM t WHERE n > 0 
)
SELECT n FROM t;
Running query in 'postgresql://127.0.0.1:5432/lec13'
11 rows affected.
Out[13]:
n
10
9
8
7
6
5
4
3
2
1
Truncated to displaylimit of 10.
In [14]:
%%sql
/* A bit harder to understand */
WITH RECURSIVE t(n) AS (
    VALUES (1)
  UNION ALL
    SELECT n+1 FROM t WHERE n < 10
)
SELECT n FROM t;
Running query in 'postgresql://127.0.0.1:5432/lec13'
10 rows affected.
Out[14]:
n
1
2
3
4
5
6
7
8
9
10
Truncated to displaylimit of 10.
In [15]:
%%sql
DROP TABLE IF EXISTS parts;
CREATE TABLE parts (
    part TEXT, sub_part TEXT, quantity INTEGER
);

INSERT INTO parts VALUES
    ('A', 'A.1', 1),
    ('A', 'A.2', 2),
    ('A', 'A.3', 3),
    ('B', 'B.1', 1),
    ('B', 'B.2', 2),
    ('B.2', 'B.2.i', 10),
    ('C', 'C.1', 1),
    ('C.1', 'C.1.i', 10),
    ('C.1.i', 'C.1.i.a', 10),
    ('C.1.i', 'C.1.i.b', 20),
    ('C.1.i.a', 'C.1.i.a.1', 30)
;
Running query in 'postgresql://127.0.0.1:5432/lec13'
11 rows affected.
Out[15]:
In [16]:
%%sql 
SELECT * FROM parts;
Running query in 'postgresql://127.0.0.1:5432/lec13'
11 rows affected.
Out[16]:
part sub_part quantity
A A.1 1
A A.2 2
A A.3 3
B B.1 1
B B.2 2
B.2 B.2.i 10
C C.1 1
C.1 C.1.i 10
C.1.i C.1.i.a 10
C.1.i C.1.i.b 20
Truncated to displaylimit of 10.
In [17]:
%%sql
WITH RECURSIVE included_parts(sub_part, part, quantity, depth) AS (
    SELECT sub_part, part, quantity, 0 FROM parts WHERE part = 'C'
  UNION ALL
    SELECT p.sub_part, p.part, p.quantity * pr.quantity, pr.depth + 1
    FROM included_parts pr, parts p
    WHERE p.part = pr.sub_part
)
SELECT sub_part, depth, SUM(quantity) as total_quantity
FROM included_parts
GROUP BY sub_part, depth
ORDER BY depth;
Running query in 'postgresql://127.0.0.1:5432/lec13'
5 rows affected.
Out[17]:
sub_part depth total_quantity
C.1 0 1
C.1.i 1 10
C.1.i.b 2 200
C.1.i.a 2 100
C.1.i.a.1 3 3000

CTEs¶

In [18]:
%config SqlMagic.displaylimit=30
In [19]:
%%sql
DROP TABLE IF EXISTS orders;
CREATE TABLE orders (
    region TEXT,
    product TEXT,
    quantity INTEGER,
    amount FLOAT
);

INSERT INTO orders VALUES
    ('US', 'apple', 100, 175.40),
    ('US', 'apple', 80, 99.70),
    ('US', 'banana', 50, 80.60),
    ('US', 'cheerios', 1000, 2500.95),
    ('Canada', 'apple', 500, 198.00),
    ('Canada', 'banana', 20, 45.50),
    ('Mexico', 'apple', 1000, 1100.39),
    ('Mexico', 'cheerios', 200, 500.50),
    ('Mexico', 'cheerios', 400, 430.65),
    ('Australia', 'banana', 40, 80.00),
    ('Australia', 'banana', 50, 99.45),
    ('Australia', 'banana', 2000, 2100.60),
    ('Australia', 'banana', 60, 75.00),
    ('Australia', 'cheerios', 500, 760.94)
;

SELECT * FROM orders;
Running query in 'postgresql://127.0.0.1:5432/lec13'
14 rows affected.
14 rows affected.
Out[19]:
region product quantity amount
US apple 100 175.4
US apple 80 99.7
US banana 50 80.6
US cheerios 1000 2500.95
Canada apple 500 198.0
Canada banana 20 45.5
Mexico apple 1000 1100.39
Mexico cheerios 200 500.5
Mexico cheerios 400 430.65
Australia banana 40 80.0
Australia banana 50 99.45
Australia banana 2000 2100.6
Australia banana 60 75.0
Australia cheerios 500 760.94
In [20]:
%%sql
WITH regional_sales AS (
    SELECT region, SUM(amount) AS total_sales
    FROM orders
    GROUP BY region
), top_regions AS (
    SELECT region
    FROM regional_sales
    WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales)
)
SELECT region,
       product,
       SUM(quantity) AS product_units,
       SUM(amount) AS product_sales
FROM orders
WHERE region IN (SELECT region FROM top_regions)
GROUP BY region, product;
Running query in 'postgresql://127.0.0.1:5432/lec13'
7 rows affected.
Out[20]:
region product product_units product_sales
Mexico apple 1000 1100.39
US apple 180 275.1
Australia cheerios 500 760.94
US banana 50 80.6
Mexico cheerios 600 931.15
US cheerios 1000 2500.95
Australia banana 2150 2355.0499999999997