# Run this cell to set up imports
import numpy as np
import pandas as pd
If you haven't yet, load in the imdb_lecture
database.
!cp -v -r ../lec02/data .
'../lec02/data/imdb_lecture.sql' -> './data/imdb_lecture.sql' '../lec02/data/imdb_lecture_local.sql' -> './data/imdb_lecture_local.sql'
%load_ext sql
!psql -h localhost -c 'DROP DATABASE IF EXISTS imdb_lecture'
!psql -h localhost -c 'CREATE DATABASE imdb_lecture'
!psql -h localhost -d imdb_lecture -f data/imdb_lecture.sql
DROP DATABASE CREATE DATABASE SET SET SET SET SET set_config ------------ (1 row) SET SET SET SET SET SET CREATE TABLE ALTER TABLE CREATE TABLE ALTER TABLE CREATE TABLE ALTER TABLE CREATE TABLE ALTER TABLE CREATE TABLE ALTER TABLE CREATE TABLE ALTER TABLE COPY 8405 COPY 4043 COPY 4923 COPY 1223 COPY 820 COPY 2420
%sql postgresql://127.0.0.1:5432/imdb_lecture
%%sql
SELECT 'Hello' || 'World',
STRPOS('Hello', 'el'),
SUBSTRING('Hello', 2, 3);
?column? | strpos | substring |
---|---|---|
HelloWorld | 2 | ell |
%%sql
SELECT REGEXP_REPLACE('Hannah Montana', '(.*)', '\1') as ex1,
REGEXP_REPLACE('Hannah Montana', '(.*) (.*)', '\2, \1') as ex2,
REGEXP_REPLACE('Phone Number 510 642 3214', '[a-zA-Z ]', '') as ex3,
REGEXP_REPLACE('Phone Number 510 642 3214', '[a-zA-Z ]', '', 'g') as ex4
;
ex1 | ex2 | ex3 | ex4 |
---|---|---|---|
Hannah Montana | Montana, Hannah | hone Number 510 642 3214 | 5106423214 |
%sql postgresql://127.0.0.1:5432/imdb_lecture
%%sql
SELECT REGEXP_REPLACE(name, '(.*) (.*)', '\2') as lastname,
REGEXP_REPLACE(name, '(.*) (.*)', '\1') as firstname
FROM people;
lastname | firstname |
---|---|
Hill | Dwayne |
Hardman | Dave |
Pryce-Jones | Rich |
Sadler | William |
Laurentiis | Giada De |
Hartmann | Janine |
Taliánová | Tereza |
Heywood | Chris |
Zhou | Chengao |
Langley | Mark |
%%sql
SELECT REGEXP_REPLACE(name, '(.*) (.*)', '\1') as firstname,
COUNT(*) as countname
FROM people
GROUP BY firstname
ORDER BY countname desc;
firstname | countname |
---|---|
Chris | 79 |
David | 18 |
Michael | 16 |
Steve | 10 |
Peter | 9 |
John | 9 |
Tom | 8 |
Mark | 8 |
Dan | 8 |
Paul | 6 |
%%sql
WITH firstnames AS (
SELECT REGEXP_REPLACE(name, '(.*) (.*)', '\1') as firstname
FROM people )
SELECT MAX(LENGTH(firstname))
FROM firstnames;
max |
---|
20 |
What is this first name? Why doesn't the below query work?
%%sql
WITH firstnames AS (
SELECT REGEXP_REPLACE(name, '(.*) (.*)', '\1') as firstname
FROM people )
SELECT MAX(LENGTH(firstname)), firstname
FROM firstnames;
RuntimeError: (psycopg2.errors.GroupingError) column "firstnames.firstname" must appear in the GROUP BY clause or be used in an aggregate function LINE 4: SELECT MAX(LENGTH(firstname)), firstname ^ [SQL: WITH firstnames AS ( SELECT REGEXP_REPLACE(name, '(.*) (.*)', '\1') as firstname FROM people ) SELECT MAX(LENGTH(firstname)), firstname FROM firstnames;] (Background on this error at: https://sqlalche.me/e/20/f405) If you need help solving this issue, send us a message: https://ploomber.io/community
Solution:
%%sql
WITH firstnames AS (
SELECT REGEXP_REPLACE(name, '(.*) (.*)', '\1') as firstname
FROM people )
SELECT MAX(LENGTH(firstname)), firstname
FROM firstnames
GROUP BY firstname
ORDER BY max DESC;
max | firstname |
---|---|
20 | Juan Francisco Pérez |
19 | José María González |
19 | Margarida Mercês de |
16 | Dadinha Monteiro |
16 | Sreemoolanagaram |
16 | Michelle van der |
15 | Jonathan Taylor |
15 | Deirdre Coleman |
15 | Nam Hoi Sap-Sam |
15 | Michael Richard |
%sql --close postgresql://127.0.0.1:5432/imdb_lecture
%reload_ext sql
!psql -h localhost -c 'DROP DATABASE IF EXISTS stops_lecture'
!psql -h localhost -c 'CREATE DATABASE stops_lecture'
!psql -h localhost -d stops_lecture -f data/stops_lecture.sql
DROP DATABASE CREATE DATABASE SET SET SET SET SET set_config ------------ (1 row) SET SET SET SET SET SET CREATE TABLE ALTER TABLE CREATE TABLE ALTER TABLE COPY 84779 COPY 211 CREATE INDEX CREATE INDEX
%sql postgresql://127.0.0.1:5432/stops_lecture
%%sql
SELECT COUNT(*)
FROM stops;
count |
---|
84779 |
%%sql
SELECT DISTINCT COUNT(*)
FROM Zips;
count |
---|
211 |
%%sql
SELECT COUNT(*)
FROM Stops
INNER JOIN Zips
ON Stops.location = Zips.location;
count |
---|
15743 |
%%sql
SELECT COUNT(*)
FROM Stops
FULL OUTER JOIN Zips
ON Stops.location = Zips.location;
count |
---|
87291 |
%%sql
SELECT COUNT(*)
FROM Stops
LEFT OUTER JOIN Zips
ON Stops.location = Zips.location;
count |
---|
87115 |
%%sql
SELECT COUNT(*)
FROM Stops
RIGHT OUTER JOIN Zips
ON Stops.location = Zips.location;
count |
---|
15919 |
%sql --close postgresql://127.0.0.1:5432/stops_lecture