import numpy as np
import pandas as pd


## we'll use the Lahman baseball database in our examples today.
## replace the database connection with a database of your own!
%reload_ext sql
%sql postgresql://localhost:5432/baseball
%config SqlMagic.displaylimit = 20


%%sql
WITH yearnum AS
  (SELECT yearid, (yearid % 100) as year
     FROM batting
  )
SELECT yearid, CONCAT('''', LPAD(year::text, 2, '0')) as year
  FROM yearnum
 LIMIT 5;


%%sql
EXPLAIN (VERBOSE true)
SELECT yearid,
       CONCAT('''', LPAD((yearid % 100)::text, 2, '0')) AS year
FROM batting;


%%sql
EXPLAIN (VERBOSE true)
SELECT p.playerid, p.so - b.so
  FROM pitching p
  INNER JOIN batting b
  ON p.playerid=b.playerid;


%%sql
SELECT namefirst, namelast, yearid, HR,
       rank() OVER (ORDER BY HR DESC),
       avg(HR)    OVER (PARTITION BY b.playerid ORDER BY yearid ROWS 3 PRECEDING) as avg_3yr,
       lag(HR, 7) OVER (PARTITION BY b.playerid ORDER BY yearid) as previous,
       lag(HR, 2) OVER (PARTITION BY b.playerid ORDER BY yearid) as lag2
FROM batting b, master m
WHERE m.playerid = b.playerid
   AND (namelast = 'Bonds' or namelast = 'Ruth')
ORDER BY HR DESC
LIMIT 10;

RuntimeError: (psycopg2.errors.UndefinedTable) relation "batting" does not exist
LINE 6: FROM batting b, master m
             ^

[SQL: SELECT namefirst, namelast, yearid, HR,
       rank() OVER (ORDER BY HR DESC),
       avg(HR)    OVER (PARTITION BY b.playerid ORDER BY yearid ROWS 3 PRECEDING) as avg_3yr,
       lag(HR, 7) OVER (PARTITION BY b.playerid ORDER BY yearid) as previous,
       lag(HR, 2) OVER (PARTITION BY b.playerid ORDER BY yearid) as lag2
FROM batting b, master m
WHERE m.playerid = b.playerid
   AND (namelast = 'Bonds' or namelast = 'Ruth')
ORDER BY HR DESC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)
If you need help solving this issue, send us a message: https://ploomber.io/community


%%sql
SELECT MIN(HR),
       percentile_cont(0.25) WITHIN GROUP (ORDER BY HR) AS p25,
       percentile_cont(0.50) WITHIN GROUP (ORDER BY HR) AS median,
       percentile_cont(0.75) WITHIN GROUP (ORDER BY HR) AS p75,
       percentile_cont(0.99) WITHIN GROUP (ORDER BY HR) AS p99,
       MAX(HR),
       AVG(HR)
FROM batting;


%%sql
SELECT HR, COUNT(*) FROM batting GROUP BY HR ORDER BY HR;


hrs = 4 # hypothetically, four home runs


%%sql
SELECT {{hrs}} as hypothetical,
       rank({{hrs}}) WITHIN GROUP (ORDER BY HR DESC),
       dense_rank({{hrs}}) WITHIN GROUP (ORDER BY HR DESC),
       percent_rank({{hrs}}) WITHIN GROUP (ORDER BY HR DESC) * 100 AS pct_rank,
       cume_dist({{hrs}}) WITHIN GROUP (ORDER BY HR)
FROM batting
LIMIT 10;


%%sql
SELECT 4 as hypothetical,
       rank(4) WITHIN GROUP (ORDER BY HR DESC),
       dense_rank(4) WITHIN GROUP (ORDER BY HR DESC),
       percent_rank(4) WITHIN GROUP (ORDER BY HR DESC) * 100 AS pct_rank,
       cume_dist(4) WITHIN GROUP (ORDER BY HR)
FROM batting
LIMIT 10;


!psql -h localhost -d gnis -c 'SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid();'
!psql -h localhost -c 'DROP DATABASE IF EXISTS gnis'
!psql -h localhost -c 'CREATE DATABASE gnis' 
!psql -h localhost -d gnis -f data/gnis.sql

 pg_terminate_backend 
----------------------
 t
(1 row)

DROP DATABASE
CREATE DATABASE
SET
SET
SET
SET
SET
 set_config 
------------
 
(1 row)

SET
SET
SET
SET
SET
SET
CREATE TABLE
ALTER TABLE
CREATE TABLE
ALTER TABLE
COPY 3195
COPY 11533
CREATE INDEX


%reload_ext sql
%sql postgresql://localhost:5432/gnis
%config SqlMagic.displaylimit = 15


%sql SELECT COUNT(*) FROM national;


%sql SELECT * FROM national WHERE county_name = 'Alameda';


%%sql
SELECT *
FROM national TABLESAMPLE BERNOULLI(10);

/srv/conda/envs/notebook/lib/python3.11/site-packages/sql/connection/connection.py:827: JupySQLRollbackPerformed: Server closed connection. JupySQL executed a ROLLBACK operation.
  warnings.warn(


%sql SELECT elev_in_m FROM National LIMIT 2;


%%sql
SELECT elev_in_m, 
    (elev_in_m / 100) :: INTEGER AS quantized,
    ((elev_in_m / 100) :: INTEGER) * 100 AS round_to_100,
    SUBSTRING(elev_in_m :: TEXT, 1, 2),
    CONCAT(SUBSTRING(elev_in_m :: TEXT, 1, 2), '00') AS substring2
FROM National
LIMIT 5;


%%sql
/* does not run in jupysql b/c of <<. copy into psql  */
SELECT elev_in_m,
    (16::INTEGER::BIT(12)) AS bit12, 
    (16::INTEGER::BIT(12)) << 3
FROM national
LIMIT 5;


%%sql
EXPLAIN (verbose)
WITH shifts AS (
SELECT elev_in_m, 
       (elev_in_m::integer::bit(12)) AS bit12, 
       (elev_in_m::integer::bit(12) >> 8) AS rightshifted, 
       ((elev_in_m::integer::bit(12) >> 8) << 8)::integer AS round_to_256,
       ((elev_in_m::integer::bit(12) >> 8) << 8)::integer % 256 AS test
  FROM national
)
SELECT COUNT(DISTINCT elev_in_m) AS elevation_meters_count,
       COUNT(DISTINCT bit12) AS bit12_count,
       COUNT(DISTINCT rightshifted) AS rightshift_count,
       COUNT(DISTINCT round_to_256) AS rounded_count
  FROM shifts;


%%sql
SELECT state_numeric, county_numeric,
       avg(elev_in_m),
       stddev(elev_in_m), count(*)
FROM national TABLESAMPLE BERNOULLI(10)
GROUP BY state_numeric, county_numeric;


%%sql
DROP VIEW IF EXISTS state_elevations;

CREATE VIEW state_elevations AS
SELECT state_numeric,
       avg(elev_in_m),
       stddev(elev_in_m), count(*)
FROM national 
GROUP BY state_numeric
;


%sql SELECT * FROM state_elevations;


%%sql
SELECT state_numeric, feature_class,
       avg(elev_in_m),
       stddev(elev_in_m), count(*)
FROM national TABLESAMPLE Bernoulli(10)
GROUP BY state_numeric, feature_class
ORDER BY count(*) DESC;


%%sql
SELECT state_numeric,
       AVG(elev_in_m),
       STDDEV(elev_in_m), COUNT(*),
       SUM(COUNT(*)) OVER () AS total,
       COUNT(*)/SUM(COUNT(*)) OVER () AS marginal
FROM national TABLESAMPLE Bernoulli(.07)
GROUP BY state_numeric;


%%sql
SELECT COUNT(DISTINCT county_numeric) FROM national;


%sql SELECT * FROM state_elevations;


%sql SELECT * FROM fips_counties LIMIT 10;


%sql CREATE EXTENSION IF NOT EXISTS tablefunc;


%%sql
WITH state_cty AS
(SELECT s.state_numeric, f.fips as county_numeric, s.avg, s.stddev, s.count
  FROM state_elevations s, fips_counties f
  WHERE s.state_numeric = f.state_numeric
)
SELECT s.*,
       n.n AS elev_in_m,
       true as elev_in_m_sim -- user-facing flag
  FROM state_cty s,
       LATERAL normal_rand(CAST(s.count AS INTEGER), s.avg, s.stddev) AS n
LIMIT 10;


## we'll use the Lahman baseball database in our initial examples today.
## replace the database connection with a database of your own!
%reload_ext sql
%sql postgresql://localhost:5432/baseball


%sql SELECT * FROM Appearances WHERE yearid > 1970 LIMIT 2;


%sql SELECT * FROM Teams LIMIT 1;


%%sql
SELECT a.playerid, a.teamid, t.divid, a.*
  FROM Appearances a
  NATURAL JOIN Teams t
WHERE a.yearid = 2015
LIMIT 100;


%%sql
CREATE OR REPLACE VIEW bball_tree AS
SELECT DISTINCT a.playerid, a.teamid, t.divid, a.lgid, a.yearid
  FROM Appearances a NATURAL JOIN Teams t;


%%sql
SELECT namefirst, namelast, yearid,
       MIN(hr), MAX(hr), AVG(hr), STDDEV(hr), SUM(hr)
FROM batting b, people p
WHERE b.playerid = p.playerid
GROUP BY namelast, namefirst, yearid
ORDER BY max DESC
LIMIT 10;


%%sql
WITH batting_tree AS
(
    SELECT b.*, t.divid
    FROM batting b, bball_tree t
    WHERE b.playerid = t.playerid
      AND b.yearid = t.yearid
)
SELECT namefirst, namelast,
       bt.teamid, bt.lgid, bt.divid, bt.yearid,
       MIN(hr), MAX(hr), AVG(hr), STDDEV(hr), SUM(hr)
FROM batting_tree bt, people p
WHERE bt.playerid = p.playerid
GROUP BY bt.playerid, bt.teamid, bt.lgid, bt.divid, bt.yearid, namelast, namefirst
ORDER BY max DESC
LIMIT 10;


# first download and unzip the data
!mkdir -p data
!wget https://geonames.usgs.gov/docs/stategaz/NationalFile.zip -P data/
!unzip -u data/NationalFile.zip -d data/


import os
fname = os.path.join("data", "NationalFile_20210825.txt")
fname

'data/NationalFile_20210825.txt'


!du -h {fname} # big file

301M	data/NationalFile_20210825.txt


!head -c 1024 {fname}



403|Bar X Wash|Stream|AZ|04|Graham|009|322815N|1095610W|32.4709038|-109.9361853|323048N|1095233W|32.5134024|-109.8759075|


# next, load it into pandas
import pandas as pd

national = pd.read_csv("data/NationalFile_20210825.txt", delimiter="|")
national.head(2)


national = national.rename(columns=dict([(col, col.lower().strip()) for col in national.columns]))
national.head(2)


import numpy as np

p = 0.005 # fraction, not percentage

np.random.seed(42)
national['keep_bool'] = np.random.random(len(national)) < p
national['keep_bool'].value_counts()

keep_bool
False    2267597
True       11533
Name: count, dtype: int64


national = national[national['keep_bool']].drop(columns=['keep_bool'])
national


!psql -h localhost -d gnis -c 'SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid();'
!psql -h localhost -c 'DROP DATABASE IF EXISTS gnis'
!psql -h localhost -c 'CREATE DATABASE gnis'

 pg_terminate_backend 
----------------------
 t
 t
(2 rows)

DROP DATABASE
CREATE DATABASE


%reload_ext sql
%sql postgresql://127.0.0.1:5432/gnis


%sql --persist-replace national


%sql ALTER TABLE national DROP COLUMN index;


!pg_dump -h localhost --encoding utf8 gnis -f data/gnis.sql


!du -h data/gnis.sql

1.6M	data/gnis.sql


!wget https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt -P data/

--2023-10-05 17:45:36--  https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt
Resolving transition.fcc.gov (transition.fcc.gov)... 23.33.29.137, 23.33.29.141, 2600:1407:3c00:30::17c8:9c44, ...
Connecting to transition.fcc.gov (transition.fcc.gov)|23.33.29.137|:443... connected.
HTTP request sent, awaiting response... ^C


import pandas as pd
import re


with open('data/fips.txt', 'r') as f:
    lines = f.readlines()


COUNTY_STARTS = 69
OFFSET = 3 # the start of the data itself, after headers


re.match('\s+(\d+)\s+(\w+)', lines[COUNTY_STARTS+3]).groups()

('01000', 'Alabama')


splits = [re.match('\s+(\d+)\s+(.*)', line).groups()
          for line in 
          lines[COUNTY_STARTS+OFFSET:]]
splits[0]

('01000', 'Alabama')


len(lines), len(splits)

(3267, 3195)


fips_counties = pd.DataFrame(data=splits, columns=['fips', 'county'])
fips_counties['state_numeric'] = fips_counties['fips'].str[:2].astype(int)
fips_counties['fips'] = fips_counties['fips'].astype(int)
fips_counties = fips_counties.set_index('fips')
fips_counties


%reload_ext sql
%sql postgresql://127.0.0.1:5432/gnis


%sql --persist-replace fips_counties


!pg_dump -h localhost --encoding utf8 gnis -f data/gnis.sql


!du -h data/gnis.sql

1.7M	data/gnis.sql

yearid	year
2004	'04
2007	'07
2009	'09
2010	'10
2012	'12

hr	count
0	63825
1	9953
2	5531
3	3806
4	2790
5	2212
6	1825
7	1533
8	1330
9	1147

feature_id	feature_name	feature_class	state_alpha	state_numeric	county_name	county_numeric	primary_lat_dms	prim_long_dms	prim_lat_dec	prim_long_dec	source_lat_dms	source_long_dms	source_lat_dec	source_long_dec	elev_in_m	elev_in_ft	map_name	date_created	date_edited
218316	Apperson Creek	Stream	CA	6	Alameda	1.0	373349N	1215000W	37.5635453	-121.8332887	373232N	1214804W	37.5422222	-121.8011111	148.0	486.0	La Costa Valley	01/19/1981	None
225998	Irvington High School	School	CA	6	Alameda	1.0	373126N	1215801W	37.523814	-121.9670659	None	None	None	None	13.0	43.0	Niles	01/19/1981	03/31/2021
226951	Laurel Elementary School	School	CA	6	Alameda	1.0	374734N	1221147W	37.792899	-122.1964288	None	None	None	None	68.0	223.0	Oakland East	06/14/2000	03/14/2021
229367	Murray Elementary School	School	CA	6	Alameda	1.0	374318N	1215557W	37.721801	-121.9326269	None	None	None	None	112.0	367.0	Dublin	01/19/1981	03/14/2021
235581	Strawberry Creek	Stream	CA	6	Alameda	1.0	375221N	1221443W	37.8724258	-122.2452464	375251N	1221354W	37.8807588	-122.2316349	154.0	505.0	Oakland East	01/19/1981	08/31/2016
1654274	Hayward Golf Course	Locale	CA	6	Alameda	1.0	373726N	1220250W	37.6238222	-122.0471843	None	None	None	None	5.0	16.0	Newark	01/19/1981	None
1664964	KOFY-AM (San Mateo)	Tower	CA	6	Alameda	1.0	374934N	1221842W	37.8260385	-122.3116366	None	None	None	None	2.0	7.0	Oakland West	07/01/1994	None
1670278	Lake Elizabeth	Lake	CA	6	Alameda	1.0	373255N	1215742W	37.5487056	-121.9617554	None	None	None	None	16.0	52.0	Niles	11/09/1995	03/07/2019
1692819	California School for the Deaf - Fremont	School	CA	6	Alameda	1.0	373334N	1215747W	37.5593966	-121.9631843	None	None	None	None	20.0	66.0	Niles	05/08/1996	09/16/2016
1692863	J A Freitas Library	Building	CA	6	Alameda	1.0	374335N	1220925W	37.7263185	-122.1569101	None	None	None	None	19.0	62.0	San Leandro	05/08/1996	None
1693010	Alice Arts Center	Building	CA	6	Alameda	1.0	374812N	1221557W	37.8032611	-122.2658025	None	None	None	None	11.0	36.0	Oakland West	05/08/1996	None
1961816	New Pilgrim Congregational Church	Church	CA	6	Alameda	1.0	374622N	1221131W	37.7727061	-122.1919115	None	None	None	None	21.0	69.0	Oakland East	09/11/2002	None
1961871	First Southern Baptist Church	Church	CA	6	Alameda	1.0	374657N	1221401W	37.7824282	-122.2335796	None	None	None	None	11.0	36.0	Oakland East	09/11/2002	None
1962002	First Church of Christ Scientist	Church	CA	6	Alameda	1.0	374353N	1220931W	37.7313183	-122.1585769	None	None	None	None	19.0	62.0	San Leandro	09/11/2002	None
2087391	First Presbyterian Church	Church	CA	6	Alameda	1.0	373254N	1220238W	37.5483333	-122.0438889	None	None	None	None	7.0	23.0	Newark	05/22/2006	None
2123537	Dimond District	Populated Place	CA	6	Alameda	1.0	374812N	1221246W	37.8033333	-122.2127778	None	None	None	None	64.0	210.0	Oakland East	05/16/2007	None
2123538	Fruitvale	Populated Place	CA	6	Alameda	1.0	374647N	1221313W	37.7797222	-122.2202778	None	None	None	None	18.0	59.0	Oakland East	05/16/2007	None
2506752	Maybeck High School	School	CA	6	Alameda	1.0	375140N	1221509W	37.8611144	-122.2524753	None	None	None	None	87.0	285.0	Oakland West	04/28/2009	10/27/2018
2506784	Tilden Elementary School	School	CA	6	Alameda	1.0	374712N	1221117W	37.7866743	-122.1880042	None	None	None	None	61.0	200.0	Oakland East	04/28/2009	04/09/2016
2518210	Mission Valley Regional Occupational Program Center	School	CA	6	Alameda	1.0	373142N	1215902W	37.5282201	-121.9839668	None	None	None	None	13.0	43.0	Niles	04/30/2009	12/20/2014
2755938	Alameda Jail	Building	CA	6	Alameda	1.0	374602N	1221435W	37.7671131	-122.242983	None	None	None	None	10.0	33.0	Oakland East	07/23/2014	06/29/2017

feature_id	feature_name	feature_class	state_alpha	state_numeric	county_name	county_numeric	primary_lat_dms	prim_long_dms	prim_lat_dec	prim_long_dec	source_lat_dms	source_long_dms	source_lat_dec	source_long_dec	elev_in_m	elev_in_ft	map_name	date_created	date_edited
4590	Fiftysix Tank	Reservoir	AZ	4	Gila	7.0	334124N	1102003W	33.6899872	-110.3340976	None	None	None	None	1728.0	5669.0	Popcorn Canyon	02/08/1980	03/19/2019
7221	Little Water	Area	AZ	4	Navajo	17.0	352649N	1103415W	35.4469546	-110.5709674	None	None	None	None	1583.0	5194.0	Montezumas Chair	02/08/1980	None
10860	San Simon Well	Well	AZ	4	Pima	19.0	321056N	1122210W	32.1822888	-112.3695831	None	None	None	None	590.0	1936.0	Wahak Hotrontk	02/08/1980	03/27/2018
13040	Union High School	School	AZ	4	Maricopa	13.0	332706N	1120402W	33.4517105	-112.0670927	None	None	None	None	333.0	1093.0	Phoenix	02/08/1980	None
21298	Jackass Basin	Basin	AZ	4	Yavapai	25.0	345414N	1131359W	34.9039033	-113.2329786	None	None	None	None	1653.0	5423.0	Sunrise Peak	06/27/1984	None
27159	Cactus Basin	Basin	AZ	4	Yavapai	25.0	341856N	1120138W	34.3155852	-112.0270998	None	None	None	None	1188.0	3898.0	Cordes Junction	02/08/1980	None
31654	McDougal Flat Tank	Reservoir	AZ	4	Coconino	5.0	351327N	1120304W	35.2240585	-112.0510582	None	None	None	None	2089.0	6854.0	Davenport Hill	02/08/1980	03/20/2019
34202	Section Twelve Tank	Reservoir	AZ	4	Navajo	17.0	343634N	1104159W	34.6094668	-110.6998296	None	None	None	None	1896.0	6220.0	Potato Wash South	02/08/1980	04/16/2019
34275	Seven Pines Tank	Reservoir	AZ	4	Coconino	5.0	352806N	1120222W	35.4684412	-112.0393681	None	None	None	None	2022.0	6634.0	Squaw Mountain	02/08/1980	03/16/2019
39573	Liveoak Mine	Mine	AZ	4	Gila	7.0	332417N	1105428W	33.4047768	-110.907891	None	None	None	None	1217.0	3993.0	Inspiration	06/27/1984	None
39998	High Tank	Reservoir	AZ	4	Greenlee	11.0	332113N	1090522W	33.3535132	-109.0894357	None	None	None	None	2091.0	6860.0	Maple Peak	06/27/1984	02/27/2019
42973	Southern Basin Light	Locale	AZ	4	Mohave	15.0	352231N	1143620W	35.3752729	-114.6055239	None	None	None	None	207.0	679.0	Spirit Mountain NE	02/01/1994	None
50059	Hickory Grove Cemetery	Cemetery	AR	5	Lonoke	85.0	343835N	0920141W	34.6431418	-92.028191	None	None	None	None	71.0	233.0	Scott	04/30/1980	10/21/2020
53536	Pleasant Grove Church	Church	AR	5	Calhoun	13.0	333216N	0922147W	33.5378906	-92.3629271	None	None	None	None	57.0	187.0	Banks	04/30/1980	None
55839	United Church	Church	AR	5	Nevada	99.0	334206N	0932158W	33.7017805	-93.3660044	None	None	None	None	82.0	269.0	Laneburg	04/30/1980	None

elev_in_m	quantized	round_to_100	substring	substring2
931.0	9	900	93	9300
2707.0	27	2700	27	2700
275.0	3	300	27	2700
1685.0	17	1700	16	1600
1354.0	14	1400	13	1300

Lecture 13: GNIS¶

Scalar Functions and Query Plans¶

Window Functions¶

Inverse Distribution Window Functions¶

Hypothetical-Set Window Functions¶

GNIS¶

Numerical Granularity¶

Demo 1: Roll-up / Drill-down Practice¶

Demo 2: Connections to Statistics¶

Roll up with marginal distributions¶

Drill down with normally-distributed elevations:¶

Assembling an Explicit Hierarchy¶

Revisiting the Home Run Query¶

[Extra] Load in the database from scratch¶

FIPS¶

state_numeric	county_numeric	avg	stddev	count
1	33.0	167.0	11.313708498984761	2
34	39.0	7.0	None	1
53	37.0	996.0	None	1
13	37.0	66.0	None	1
37	139.0	2.0	None	1
36	119.0	99.0	None	1
49	49.0	1502.0	188.09040379562165	2
22	107.0	20.0	None	1
45	45.0	274.0	None	1
28	47.0	7.0	None	1
13	153.0	129.0	8.48528137423857	2
42	107.0	159.0	None	1
48	171.0	569.0	None	1
40	109.0	341.3333333333333	15.631165450257807	3
47	135.0	211.0	None	1

state_numeric	avg	stddev	count
54	363.6190476190476	199.26650831834746	204
29	246.09152542372883	80.2483078596168	343
68	6.666666666666667	7.99166232186187	14
4	1315.3798076923076	672.6305522946129	208
34	40.08943089430894	59.88896941733248	123
51	254.55197132616487	260.54513270095333	283
70	18.333333333333332	31.75426480542942	3
10	22.11111111111111	28.015563440198648	27
35	1756.8467432950192	471.8002505531821	273
45	122.83240223463687	123.96059930539184	181
6	516.7286432160804	684.895132373804	599
39	262.80258899676375	57.78124212343727	328
69	90.66666666666667	82.82189726555814	6
36	217.14150943396226	197.0201865875975	424
31	628.2797202797203	293.8114851587473	148

state_numeric	feature_class	avg	stddev	count
42	Populated Place	238.15384615384616	141.2915697849935	13
4	Reservoir	1747.3636363636363	504.0405286734932	11
39	Building	256.77777777777777	33.555095655420864	9
36	Building	189.66666666666666	188.56696423286874	9
36	Church	125.11111111111111	101.1810807963184	9
6	School	85.55555555555556	96.14326693938467	9
48	Populated Place	389.125	404.61143176421217	8
13	Church	231.25	114.47738391739841	8
6	Building	514.875	689.7219807388233	8
48	School	186.83333333333334	121.03787286079786	8
48	Cemetery	289.625	293.16057886421225	8
41	Stream	408.625	394.3881110566812	8
36	School	155.75	80.9898582716203	8
6	Stream	634.5	601.6745679470637	8
24	Populated Place	73.875	50.123953498160084	8

state_numeric	avg	stddev	count	total	marginal
5	225.5	218.4959953866432	2	5	0.40000000000000000000
18	203.0	None	1	5	0.20000000000000000000
48	159.5	47.37615433949868	2	5	0.40000000000000000000

state_numeric	avg	stddev	count
54	311.6521739130435	172.46517664198242	24
29	216.8846153846154	65.13083873132722	31
68	19.0	None	1
4	1252.16	646.1850663703085	25
34	54.0	89.39188492759894	12
51	225.16129032258064	274.66732565950804	31
10	0.0	None	1
35	1720.0333333333333	460.96880613538815	31
45	121.47058823529412	95.33173504076359	18
6	403.62857142857143	493.95684759681654	71
39	256.8076923076923	61.49602863975479	27
69	110.0	None	1
36	213.66666666666666	193.76780502485684	54
31	602.2	275.311863569702	10
50	344.5	338.0424529552464	6

fips	county	state_numeric
1000	Alabama	1
1001	Autauga County	1
1003	Baldwin County	1
1005	Barbour County	1
1007	Bibb County	1
1009	Blount County	1
1011	Bullock County	1
1013	Butler County	1
1015	Calhoun County	1
1017	Chambers County	1

state_numeric	county_numeric	avg	stddev	count	elev_in_m	elev_in_m_sim
1	1000	146.37888198757764	102.92185851771194	339	137.25272229118536	True
1	1000	146.37888198757764	102.92185851771194	339	203.70551142702658	True
1	1000	146.37888198757764	102.92185851771194	339	18.161649295434188	True
1	1000	146.37888198757764	102.92185851771194	339	186.63437694861264	True
1	1000	146.37888198757764	102.92185851771194	339	212.52078374095765	True
1	1000	146.37888198757764	102.92185851771194	339	2.890975715503089	True
1	1000	146.37888198757764	102.92185851771194	339	101.17735438334532	True
1	1000	146.37888198757764	102.92185851771194	339	47.27059053855152	True
1	1000	146.37888198757764	102.92185851771194	339	226.06079671851296	True
1	1000	146.37888198757764	102.92185851771194	339	182.8006984455563	True

yearid	teamid	lgid	playerid	g_all	gs	g_batting	g_defense	g_p	g_c	g_1b	g_2b	g_3b	g_ss	g_lf	g_cf	g_rf	g_of	g_dh	g_ph	g_pr
1971	ATL	NL	aaronha01	139	129	139	129	0	0	71	0	0	0	0	0	60	60	0	10	0
1971	ATL	NL	aaronto01	25	10	25	18	0	0	11	0	7	0	0	0	0	0	0	8	0

playerid	teamid	divid	yearid	teamid_1	lgid	playerid_1	g_all	gs	g_batting	g_defense	g_p	g_c	g_1b	g_2b	g_3b	g_ss	g_lf	g_cf	g_rf	g_of	g_dh	g_ph	g_pr
alvarda02	BAL	E	2015	BAL	AL	alvarda02	12	10	12	12	0	0	0	0	0	0	0	1	12	12	0	0	0
brachbr01	BAL	E	2015	BAL	AL	brachbr01	62	0	5	62	62	0	0	0	0	0	0	0	0	0	0	0	0
brittza01	BAL	E	2015	BAL	AL	brittza01	64	0	2	64	64	0	0	0	0	0	0	0	0	0	0	0	0
cabrace01	BAL	E	2015	BAL	AL	cabrace01	2	0	0	2	2	0	0	0	0	0	0	0	0	0	0	0	0
cabreev01	BAL	E	2015	BAL	AL	cabreev01	29	28	29	28	0	0	0	2	0	27	0	0	0	0	0	0	1
chenwe02	BAL	E	2015	BAL	AL	chenwe02	31	31	0	31	31	0	0	0	0	0	0	0	0	0	0	0	0
clevest01	BAL	E	2015	BAL	AL	clevest01	30	24	30	10	0	9	1	0	0	0	0	0	0	0	18	4	0
davisch02	BAL	E	2015	BAL	AL	davisch02	160	159	160	138	0	0	111	0	0	0	0	0	30	30	22	0	0
deazaal01	BAL	E	2015	BAL	AL	deazaal01	30	27	30	27	0	0	0	0	0	0	19	0	13	27	0	3	0
drakeol01	BAL	E	2015	BAL	AL	drakeol01	13	0	1	13	13	0	0	0	0	0	0	0	0	0	0	0	0
flahery01	BAL	E	2015	BAL	AL	flahery01	91	81	91	90	0	0	11	56	8	15	1	0	5	6	0	2	3
garcija03	BAL	E	2015	BAL	AL	garcija03	21	0	3	21	21	0	0	0	0	0	0	0	0	0	0	0	0
gausmke01	BAL	E	2015	BAL	AL	gausmke01	25	17	1	25	25	0	0	0	0	0	0	0	0	0	0	0	0
givenmy01	BAL	E	2015	BAL	AL	givenmy01	22	0	1	22	22	0	0	0	0	0	0	0	0	0	0	0	0
gonzami03	BAL	E	2015	BAL	AL	gonzami03	26	26	1	26	26	0	0	0	0	0	0	0	0	0	0	0	0

namefirst	namelast	yearid	min	max	avg	stddev	sum
Barry	Bonds	2001	73	73	73.0000000000000000	None	73
Mark	McGwire	1998	70	70	70.0000000000000000	None	70
Sammy	Sosa	1998	66	66	66.0000000000000000	None	66
Mark	McGwire	1999	65	65	65.0000000000000000	None	65
Sammy	Sosa	2001	64	64	64.0000000000000000	None	64
Sammy	Sosa	1999	63	63	63.0000000000000000	None	63
Roger	Maris	1961	61	61	61.0000000000000000	None	61
Babe	Ruth	1927	60	60	60.0000000000000000	None	60
Babe	Ruth	1921	59	59	59.0000000000000000	None	59
Giancarlo	Stanton	2017	59	59	59.0000000000000000	None	59

QUERY PLAN
Seq Scan on public.batting (cost=0.00..3922.29 rows=104324 width=36)
Output: yearid, concat('''', lpad(((yearid % 100))::text, 2, '0'::text))

QUERY PLAN
Nested Loop (cost=0.43..13004.27 rows=339358 width=13)
Output: p.playerid, (p.so - b.so)
-> Seq Scan on public.pitching p (cost=0.00..1374.06 rows=45806 width=13)
Output: p.playerid, p.yearid, p.stint, p.teamid, p.lgid, p.w, p.l, p.g, p.gs, p.cg, p.sho, p.sv, p.ipouts, p.h, p.er, p.hr, p.bb, p.so, p.baopp, p.era, p.ibb, p.wp, p.hbp, p.bk, p.bfp, p.gf, p.r, p.sh, p.sf, p.gidp
-> Memoize (cost=0.43..0.73 rows=7 width=13)
Output: b.so, b.playerid
Cache Key: p.playerid
Cache Mode: logical
-> Index Scan using batting_pkey on public.batting b (cost=0.42..0.72 rows=7 width=13)
Output: b.so, b.playerid
Index Cond: ((b.playerid)::text = (p.playerid)::text)

	FEATURE_ID	FEATURE_NAME	FEATURE_CLASS	STATE_ALPHA	STATE_NUMERIC	COUNTY_NAME	COUNTY_NUMERIC	PRIMARY_LAT_DMS	PRIM_LONG_DMS	PRIM_LAT_DEC	PRIM_LONG_DEC	SOURCE_LAT_DMS	SOURCE_LONG_DMS	SOURCE_LAT_DEC	SOURCE_LONG_DEC	ELEV_IN_M	ELEV_IN_FT	MAP_NAME	DATE_CREATED	DATE_EDITED
0	399	Agua Sal Creek	Stream	AZ	4	Apache	1.0	362740N	1092842W	36.461112	-109.478439	362053N	1090915W	36.348058	-109.154266	1645.0	5397.0	Fire Dance Mesa	02/08/1980	NaN
1	400	Agua Sal Wash	Valley	AZ	4	Apache	1.0	363246N	1093103W	36.546112	-109.517607	362740N	1092842W	36.461112	-109.478439	1597.0	5239.0	Little Round Rock	02/08/1980	NaN

	feature_id	feature_name	feature_class	state_alpha	state_numeric	county_name	county_numeric	primary_lat_dms	prim_long_dms	prim_lat_dec	prim_long_dec	source_lat_dms	source_long_dms	source_lat_dec	source_long_dec	elev_in_m	elev_in_ft	map_name	date_created	date_edited
0	399	Agua Sal Creek	Stream	AZ	4	Apache	1.0	362740N	1092842W	36.461112	-109.478439	362053N	1090915W	36.348058	-109.154266	1645.0	5397.0	Fire Dance Mesa	02/08/1980	NaN
1	400	Agua Sal Wash	Valley	AZ	4	Apache	1.0	363246N	1093103W	36.546112	-109.517607	362740N	1092842W	36.461112	-109.478439	1597.0	5239.0	Little Round Rock	02/08/1980	NaN

	feature_id	feature_name	feature_class	state_alpha	state_numeric	county_name	county_numeric	primary_lat_dms	prim_long_dms	prim_lat_dec	prim_long_dec	source_lat_dms	source_long_dms	source_lat_dec	source_long_dec	elev_in_m	elev_in_ft	map_name	date_created	date_edited
821	1230	Belmont Mountains	Range	AZ	4	Maricopa	13.0	333832N	1125404W	33.642258	-112.901013	NaN	NaN	NaN	NaN	931.0	3054.0	Belmont Mountain	02/08/1980	NaN
1426	1839	Bootlegger Saddle	Gap	AZ	4	Cochise	3.0	315335N	1091659W	31.893147	-109.283118	NaN	NaN	NaN	NaN	2707.0	8881.0	Rustler Park	02/08/1980	NaN
1921	2336	Cabeza Prieta Game Range	Park	AZ	4	Yuma	27.0	321500N	1132703W	32.250056	-113.450740	NaN	NaN	NaN	NaN	275.0	902.0	Bryan Mountains	02/08/1980	NaN
2327	2750	Chandler Springs	Spring	AZ	4	Navajo	17.0	352236N	1102831W	35.376679	-110.475410	NaN	NaN	NaN	NaN	1685.0	5528.0	Shonto Butte	02/08/1980	NaN
2604	3032	Cline Well	Well	AZ	4	Cochise	3.0	315400N	1102034W	31.900085	-110.342853	NaN	NaN	NaN	NaN	1354.0	4442.0	Benson	02/08/1980	11/15/2017
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2277956	2808781	David Albro Lot Cemetery	Cemetery	RI	44	Newport	5.0	413212N	0711547W	41.536605	-71.263026	NaN	NaN	NaN	NaN	66.0	217.0	Prudence Island	08/28/2020	NaN
2278471	2813040	Rustic Acres Colony Census Designated Place	Census	SD	46	Lake	79.0	435211N	0970556W	43.869779	-97.098962	NaN	NaN	NaN	NaN	531.0	1742.0	Buffalo Trading Post	06/18/2020	08/12/2021
2278630	2813352	Jones Valley Census Designated Place	Census	CA	6	Shasta	89.0	404159N	1221426W	40.699842	-122.240583	NaN	NaN	NaN	NaN	293.0	961.0	Bella Vista	08/03/2020	08/12/2021
2278698	2813492	Cold Springs	Reservoir	VT	50	Windham	25.0	430751N	0722637W	43.130841	-72.443530	NaN	NaN	NaN	NaN	75.0	246.0	Bellows Falls	09/10/2020	01/24/2021
2279100	27833590	Murrieta Carrier Annex Post Office	Post Office	CA	6	Riverside	65.0	333430N	1171046W	33.575019	-117.179569	NaN	NaN	NaN	NaN	379.0	1243.0	Murrieta	02/08/2018	NaN