# Run this cell to set up imports
import numpy as np
import pandas as pd

!ln -sf ../../lec/lec06/data .
!unzip -u data/imdb_perf_lecture.zip -d data/

Archive:  data/imdb_perf_lecture.zip

!psql -h localhost -c 'DROP DATABASE IF EXISTS imdb_perf_lecture'
!psql -h localhost -c 'CREATE DATABASE imdb_perf_lecture' 
!psql -h localhost -d imdb_perf_lecture -f data/imdb_perf_lecture.sql

DROP DATABASE
CREATE DATABASE
SET
SET
SET
SET
SET
 set_config 
------------
 
(1 row)

SET
SET
SET
SET
SET
SET
CREATE TABLE
ALTER TABLE
CREATE TABLE
ALTER TABLE
CREATE TABLE
ALTER TABLE
COPY 845888
COPY 2211936
COPY 656453
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE

%reload_ext sql

%sql postgresql://127.0.0.1:5432/imdb_perf_lecture

# run this cell to remove 10-row limit on display
%config SqlMagic.displaylimit = 20

def printplans(x):
    result = x.DataFrame()
    result.style.set_properties(**{'text-align': 'left'})
    print(result)

%%sql
/* 1 */
EXPLAIN ANALYZE SELECT id FROM actors
WHERE id > 4000000 AND
name='Tom Hanks';

printplans(__)

                                          QUERY PLAN
0  Gather  (cost=1000.00..11512.90 rows=1 width=4...
1                                 Workers Planned: 2
2                                Workers Launched: 2
3    ->  Parallel Seq Scan on actors  (cost=0.00....
4          Filter: ((id > 4000000) AND (name = 'T...
5                     Rows Removed by Filter: 281963
6                            Planning Time: 0.065 ms
7                          Execution Time: 26.756 ms

%%sql
/* 2 */
EXPLAIN ANALYZE
SELECT id FROM actors
ORDER BY name;

printplans(__)

                                          QUERY PLAN
0  Sort  (cost=114312.51..116427.23 rows=845888 w...
1                                     Sort Key: name
2         Sort Method: external merge  Disk: 23672kB
3    ->  Seq Scan on actors  (cost=0.00..13684.88...
4                            Planning Time: 0.051 ms
5                        Execution Time: 2815.284 ms

%%sql
/* 3 */
EXPLAIN ANALYZE
SELECT id FROM actors
ORDER BY id
LIMIT 10;

printplans(__)

                                          QUERY PLAN
0  Limit  (cost=0.42..0.68 rows=10 width=4) (actu...
1    ->  Index Only Scan using actor_pkey on acto...
2                                    Heap Fetches: 0
3                            Planning Time: 0.076 ms
4                           Execution Time: 0.037 ms

%%sql
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info
WHERE actors.id = cast_info.person_id;

printplans(__)

                                          QUERY PLAN
0  Hash Join  (cost=29215.48..89168.21 rows=22119...
1       Hash Cond: (cast_info.person_id = actors.id)
2    ->  Seq Scan on cast_info  (cost=0.00..31907...
3    ->  Hash  (cost=13684.88..13684.88 rows=8458...
4          Buckets: 65536  Batches: 16  Memory Us...
5          ->  Seq Scan on actors  (cost=0.00..13...
6                            Planning Time: 0.152 ms
7                        Execution Time: 1513.363 ms

%%sql
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info
WHERE actors.id = cast_info.person_id
LIMIT 10;

printplans(__)

                                           QUERY PLAN
0   Limit  (cost=0.43..4.49 rows=10 width=26) (act...
1     ->  Nested Loop  (cost=0.43..895896.13 rows=...
2           ->  Seq Scan on cast_info  (cost=0.00....
3           ->  Memoize  (cost=0.43..0.47 rows=1 w...
4                      Cache Key: cast_info.person_id
5                                 Cache Mode: logical
6                 Hits: 2  Misses: 8  Evictions: 0...
7                 ->  Index Scan using actor_pkey ...
8                       Index Cond: (id = cast_inf...
9                             Planning Time: 0.166 ms
10                           Execution Time: 0.497 ms

%%sql
EXPLAIN ANALYZE
SELECT name, movie_id
FROM actors, cast_info
WHERE actors.id = cast_info.person_id;

printplans(__)

                                          QUERY PLAN
0  Hash Join  (cost=29215.48..89168.21 rows=22119...
1       Hash Cond: (cast_info.person_id = actors.id)
2    ->  Seq Scan on cast_info  (cost=0.00..31907...
3    ->  Hash  (cost=13684.88..13684.88 rows=8458...
4          Buckets: 65536  Batches: 16  Memory Us...
5          ->  Seq Scan on actors  (cost=0.00..13...
6                            Planning Time: 0.145 ms
7                        Execution Time: 1363.600 ms

%%sql
EXPLAIN ANALYZE
SELECT name, movie_id
FROM actors, cast_info
WHERE actors.id = cast_info.person_id AND actors.id > 4000000;

printplans(__)

                                          QUERY PLAN
0  Hash Join  (cost=23988.61..81598.34 rows=11662...
1       Hash Cond: (cast_info.person_id = actors.id)
2    ->  Seq Scan on cast_info  (cost=0.00..31907...
3    ->  Hash  (cost=15799.60..15799.60 rows=4460...
4          Buckets: 65536  Batches: 8  Memory Usa...
5          ->  Seq Scan on actors  (cost=0.00..15...
6                             Filter: (id > 4000000)
7                     Rows Removed by Filter: 401107
8                            Planning Time: 0.151 ms
9                        Execution Time: 1098.684 ms

%%sql 
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info, movies
WHERE actors.id = cast_info.person_id
    AND movies.id = cast_info.movie_id
LIMIT 10;

printplans(__)

                                           QUERY PLAN
0   Limit  (cost=0.86..9.46 rows=10 width=56) (act...
1     ->  Nested Loop  (cost=0.86..1901329.72 rows...
2           ->  Nested Loop  (cost=0.43..895896.13...
3                 ->  Seq Scan on cast_info  (cost...
4                 ->  Memoize  (cost=0.43..0.47 ro...
5                       Cache Key: cast_info.perso...
6                                 Cache Mode: logical
7                       Hits: 2  Misses: 8  Evicti...
8                       ->  Index Scan using actor...
9                             Index Cond: (id = ca...
10          ->  Index Scan using movie_pkey on mov...
11                Index Cond: (id = cast_info.movi...
12                            Planning Time: 0.533 ms
13                           Execution Time: 0.442 ms

%%sql
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info, movies
WHERE actors.id = cast_info.person_id
    AND movies.id = cast_info.movie_id
    AND name = 'Tom Hanks';

printplans(__)

                                           QUERY PLAN
0   Gather  (cost=10632.10..32056.15 rows=3 width=...
1                                  Workers Planned: 2
2                                 Workers Launched: 2
3     ->  Nested Loop  (cost=9632.10..31055.85 row...
4           ->  Parallel Hash Join  (cost=9631.68....
5                 Hash Cond: (cast_info.person_id ...
6                 ->  Parallel Seq Scan on cast_in...
7                 ->  Parallel Hash  (cost=9631.67...
8                       Buckets: 1024  Batches: 1 ...
9                       ->  Parallel Seq Scan on a...
10                            Filter: (name = 'Tom...
11                            Rows Removed by Filt...
12          ->  Index Scan using movie_pkey on mov...
13                Index Cond: (id = cast_info.movi...
14                            Planning Time: 0.257 ms
15                         Execution Time: 295.372 ms

%%sql
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info, movies
WHERE actors.id = cast_info.person_id
    AND movies.id = cast_info.movie_id
    AND title LIKE 'Snakes on a Plane';

printplans(__)

                                           QUERY PLAN
0   Gather  (cost=9279.46..30704.82 rows=7 width=5...
1                                  Workers Planned: 2
2                                 Workers Launched: 2
3     ->  Nested Loop  (cost=8279.46..29704.12 row...
4           ->  Parallel Hash Join  (cost=8279.04....
5                 Hash Cond: (cast_info.movie_id =...
6                 ->  Parallel Seq Scan on cast_in...
7                 ->  Parallel Hash  (cost=8279.03...
8                       Buckets: 1024  Batches: 1 ...
9                       ->  Parallel Seq Scan on m...
10                            Filter: (title ~~ 'S...
11                            Rows Removed by Filt...
12          ->  Index Scan using actor_pkey on act...
13                Index Cond: (id = cast_info.pers...
14                            Planning Time: 0.270 ms
15                         Execution Time: 329.274 ms

%%sql
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info, movies
WHERE actors.id = cast_info.person_id
    AND movies.id = cast_info.movie_id
LIMIT 10;

printplans(__)

                                           QUERY PLAN
0   Limit  (cost=0.86..9.46 rows=10 width=56) (act...
1     ->  Nested Loop  (cost=0.86..1901329.72 rows...
2           ->  Nested Loop  (cost=0.43..895896.13...
3                 ->  Seq Scan on cast_info  (cost...
4                 ->  Memoize  (cost=0.43..0.47 ro...
5                       Cache Key: cast_info.perso...
6                                 Cache Mode: logical
7                       Hits: 2  Misses: 8  Evicti...
8                       ->  Index Scan using actor...
9                             Index Cond: (id = ca...
10          ->  Index Scan using movie_pkey on mov...
11                Index Cond: (id = cast_info.movi...
12                            Planning Time: 0.272 ms
13                           Execution Time: 0.333 ms

%sql ALTER TABLE actors DROP CONSTRAINT actor_pkey CASCADE;

%%sql
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info, movies
WHERE actors.id = cast_info.person_id
    AND movies.id = cast_info.movie_id
LIMIT 10;

printplans(__)

                                           QUERY PLAN
0   Limit  (cost=16222.62..16228.33 rows=10 width=...
1     ->  Nested Loop  (cost=16222.62..1278418.10 ...
2           ->  Gather  (cost=16222.20..272984.51 ...
3                                  Workers Planned: 2
4                                 Workers Launched: 2
5                 ->  Parallel Hash Join  (cost=15...
6                       Hash Cond: (cast_info.pers...
7                       ->  Parallel Seq Scan on c...
8                       ->  Parallel Hash  (cost=8...
9                             Buckets: 65536  Batc...
10                            ->  Parallel Seq Sca...
11          ->  Index Scan using movie_pkey on mov...
12                Index Cond: (id = cast_info.movi...
13                            Planning Time: 0.286 ms
14                         Execution Time: 846.299 ms

%sql ALTER TABLE movies DROP CONSTRAINT movie_pkey CASCADE;

%%sql
EXPLAIN ANALYZE
SELECT *
FROM actors, cast_info, movies
WHERE actors.id = cast_info.person_id
    AND movies.id = cast_info.movie_id
LIMIT 10;

printplans(__)

                                           QUERY PLAN
0   Limit  (cost=29106.44..29107.70 rows=10 width=...
1     ->  Gather  (cost=29106.44..307637.07 rows=2...
2                                  Workers Planned: 2
3                                 Workers Launched: 2
4           ->  Parallel Hash Join  (cost=28106.44...
5                 Hash Cond: (cast_info.movie_id =...
6                 ->  Parallel Hash Join  (cost=15...
7                       Hash Cond: (cast_info.pers...
8                       ->  Parallel Seq Scan on c...
9                       ->  Parallel Hash  (cost=8...
10                            Buckets: 65536  Batc...
11                            ->  Parallel Seq Sca...
12                ->  Parallel Hash  (cost=7595.22...
13                      Buckets: 65536  Batches: 1...
14                      ->  Parallel Seq Scan on m...
15                            Planning Time: 0.209 ms
16                        Execution Time: 2283.632 ms

%sql --close postgresql://127.0.0.1:5432/imdb_perf_lecture

!psql -h localhost -c 'DROP DATABASE IF EXISTS imdb_perf_lecture'

ERROR:  database "imdb_perf_lecture" is being accessed by other users
DETAIL:  There is 1 other session using the database.

Lecture 08¶

Load in the IMDB Performance database¶

Start `jupysql`¶

Matching¶

Two-table demo: LIMIT¶

Two-table demo: Projection¶

Three-way joins¶

Three-way joins with Indexes¶

Cleanup¶

Lecture 08¶

Load in the IMDB Performance database¶

Start jupysql¶

Matching¶

Two-table demo: LIMIT¶

Two-table demo: Projection¶

Three-way joins¶

Three-way joins with Indexes¶

Cleanup¶

Start `jupysql`¶