# Run this cell to set up imports
import numpy as np
import pandas as pd


!psql -h localhost -c 'DROP DATABASE IF EXISTS stops_lecture'
!psql -h localhost -c 'CREATE DATABASE stops_lecture' 
!psql -h localhost -d stops_lecture -f data/stops_lecture.sql

DROP DATABASE
CREATE DATABASE
SET
SET
SET
SET
SET
 set_config 
------------
 
(1 row)

SET
SET
SET
SET
SET
SET
CREATE TABLE
ALTER TABLE
CREATE TABLE
ALTER TABLE
COPY 84779
COPY 211
CREATE INDEX
CREATE INDEX


%load_ext sql


%sql --close postgresql://127.0.0.1:5432/stops_lecture

RuntimeError: Could not close connection because it was not found amongst these: []
If you need help solving this issue, send us a message: https://ploomber.io/community


%sql postgresql://127.0.0.1:5432/stops_lecture


%sqlcmd tables


%sqlcmd columns -t stops


%sqlcmd columns -t zips


%%sql
SELECT *
FROM Zips
LIMIT 10;


%%sql
SELECT *
FROM Stops
ORDER BY RANDOM()
LIMIT 10;


result


%%sql
SELECT *
FROM Stops
WHERE id = 123;


%%sql
SELECT S1.id, S1.race,
       S1.location
FROM Stops as S1,   -- the "other stops"
     Stops as S2    -- the "stop that refers to ID 123"
WHERE S1.location = S2.location -- the "other stops" must match the ID 123 location
    AND S2.id = 123;


%%sql
WITH Location123 AS (
    SELECT location
    FROM Stops
    WHERE id = 123
) SELECT * FROM Location123;


%%sql
WITH Location123 AS (
    SELECT location
    FROM Stops
    WHERE id = 123
)
SELECT S.id, S.race,
       S.location
FROM Stops as S,
    Location123
WHERE S.location = Location123.location;


%%sql
SELECT S1.id, S1.race, S1.location
FROM Stops S1
WHERE S1.location = (
         SELECT S2.location
         FROM Stops
         S2 WHERE S2.id = 123);


%%sql
SELECT DISTINCT Stops.location FROM Stops
WHERE NOT EXISTS (
  SELECT * FROM Zips
  WHERE Zips.location = 
          Stops.location);


%%sql
WITH StopZips AS (
    SELECT * FROM Stops NATURAL JOIN Zips
)
SELECT *
FROM StopZips SZ1
WHERE NOT EXISTS (
    -- prove that there is not another id
    -- for the same zipcode
    -- --> i.e., there is an id for the same zipcode
    SELECT *
    FROM StopZips SZ2
    WHERE SZ1.zipcode = SZ2.zipcode -- same zipcode as the table we're looking at
          AND SZ1.id != SZ2.id
);


%%sql
SELECT Z1.zipcode, COUNT(*) as count
FROM Stops S1
  NATURAL JOIN Zips Z1
GROUP BY Z1.zipcode
ORDER BY count ASC;


%%sql
SELECT S1.id, S1.age
FROM Stops S1 
WHERE S1.age = 
	MAX(S1.age) -- yes, we need to compare to a single value!

RuntimeError: (psycopg2.errors.GroupingError) aggregate functions are not allowed in WHERE
LINE 4:  MAX(S1.age) -- yes, we need to compare to a single value!
         ^

[SQL: SELECT S1.id, S1.age
FROM Stops S1 
WHERE S1.age = 
	MAX(S1.age) -- yes, we need to compare to a single value!]
(Background on this error at: https://sqlalche.me/e/20/f405)
If you need help solving this issue, send us a message: https://ploomber.io/community


%%sql
SELECT S1.id, S1.age
FROM Stops S1 
WHERE S1.age = 
	(SELECT MAX(S2.age)
     FROM Stops S2);


%%sql
SELECT location,
       AVG(age) AS avgage,
       MIN(age) AS minage
FROM Stops
GROUP BY location;


%%sql
SELECT race, 
AVG (CASE WHEN location = 'West Oakland'
     THEN age
     ELSE NULL
     END
    ) AS west_oakland_avg,
AVG (CASE WHEN location = 'Rockridge'
     THEN age
     ELSE NULL
     END
    ) AS rockridge_Avg 
FROM Stops
GROUP BY race;

name	type	nullable	default	autoincrement	comment
id	BIGINT	True	None	False	None
race	TEXT	True	None	False	None
sex	TEXT	True	None	False	None
age	DOUBLE PRECISION	True	None	False	None
arrest	BOOLEAN	True	None	False	None
citation	BOOLEAN	True	None	False	None
warning	BOOLEAN	True	None	False	None
search	BOOLEAN	True	None	False	None
location	TEXT	True	None	False	None

name	type	nullable	default	autoincrement	comment
zip_id	BIGINT	True	None	False	None
location	TEXT	True	None	False	None
zipcode	BIGINT	True	None	False	None

zip_id	location	zipcode
0	None	94546
1	Chabot Park	94546
2	Panoramic Hill	94563
3	None	94577
4	North Stonehurst	94577
5	Arrowhead Marsh	94601
6	Bartlett	94601
7	Coliseum Industrial	94601
8	Fairfax	94601
9	Fairfax Business-Wentworth-Holland	94601

id	race	sex	age	arrest	citation	warning	search	location
4099	hispanic	male	None	False	True	False	False	International Boulevard
70459	black	male	None	False	False	True	False	North Oakland
102529	black	female	None	False	True	False	False	East Oakland
120631	black	male	33.0	False	False	True	False	North Oakland
11945	white	male	None	False	False	False	False	North Oakland
82800	black	male	None	False	True	False	False	Downtown Oakland
104649	hispanic	male	None	False	True	False	False	East Oakland
132303	other	male	28.0	False	False	True	False	None
8846	hispanic	female	None	False	True	False	False	East Oakland
77060	black	male	None	False	True	False	False	None

id	race	location
2720	white	Bancroft Avenue
3097	black	Bancroft Avenue
4715	hispanic	Bancroft Avenue
12159	black	Bancroft Avenue
80360	hispanic	Bancroft Avenue
123	hispanic	Bancroft Avenue
603	black	Bancroft Avenue
928	black	Bancroft Avenue
2857	black	Bancroft Avenue
2914	hispanic	Bancroft Avenue

Lecture 05: Subqueries, Aggregation¶

`jupysql`¶

Exercise #1¶

`EXISTS` Subquery (and `NOT EXISTS`)¶

Exercise #2¶

Exercise #3¶

Exercise #4¶

zipcode	count
94621	21
94601	43
94607	48
94705	63
94618	126
94609	292
94608	346
94603	435
94619	523
94612	537

location	avgage	minage
None	31.541920731707318	13.0
Thornhill Drive	36.42857142857143	27.0
Pendleton Way	27.5	17.0
Thermal Street	24.0	16.0
Harrison/Oakland	32.629629629629626	18.0
B Street	29.642857142857142	17.0
Highland Hospital	37.333333333333336	31.0
Weld Street	30.571428571428573	20.0
Royal Kitchen	None	None
Montclair	42.01176470588236	16.0

race	west_oakland_avg	rockridge_avg
other	34.785714285714285	55.0
hispanic	30.408333333333335	None
black	35.700739744451916	None
asian/pacific islander	32.61290322580645	44.5
white	36.734375	45.9

Lecture 05: Subqueries, Aggregation¶

jupysql¶

Exercise #1¶

EXISTS Subquery (and NOT EXISTS)¶

Exercise #2¶

Exercise #3¶

Exercise #4¶

`jupysql`¶

`EXISTS` Subquery (and `NOT EXISTS`)¶