import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import importlib
importlib.reload(mpl); importlib.reload(plt); importlib.reload(sns)
sns.reset_orig()
sns.set(font_scale=1.5)
%matplotlib inline
%reload_ext sql
%sql postgresql://localhost:5432/postgres
%sql SELECT setseed(0.42) -- to fix randomness
setseed |
---|
Just like a Python package, we need to import tablefunc
in order to use the normal_rand
function.
%sql CREATE EXTENSION IF NOT EXISTS tablefunc;
Assume our datapoints are truly Normal. Simulate them in observations
.
%sql DROP TABLE IF EXISTS observations CASCADE;
%sql CREATE TABLE observations AS \
SELECT normal_rand AS x \
FROM normal_rand(1000, 50, 5);
results = %sql SELECT x FROM observations;
sns.displot(results.dict(), fill=True, kde=True, bins=20,
height=4, aspect=2)
<seaborn.axisgrid.FacetGrid at 0x7f2e62b58d50>
Construct a view called normal_outliers
using the 2 sigma metric.
# construct the view
%%sql
CREATE OR REPLACE VIEW normal_outliers AS
WITH bounds AS (
SELECT avg(x) - 2*stddev(x) AS lo,
avg(x) + 2*stddev(x) AS hi
FROM observations
)
SELECT x AS outlier
FROM observations o, bounds b
WHERE x NOT BETWEEN b.lo AND b.hi;
# query the view
%sql SELECT * FROM normal_outliers;
outlier |
---|
59.82230258659907 |
39.688576522693424 |
61.3054707730887 |
61.29016170171167 |
37.638986178841535 |
38.80156045940347 |
36.444988608291084 |
34.799624865301666 |
39.71593476243275 |
61.272855781811394 |
results = %sql SELECT x, 'original' AS label \
FROM observations \
UNION ALL \
SELECT x, 'cleaned' AS label\
FROM observations \
WHERE x NOT IN \
(SELECT * FROM normal_outliers)
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=20,
height=4, aspect=1.5)
plt.xlim(30, 70)
(30.0, 70.0)
A.
## plots outliers and original
results = %sql SELECT x, 'original' AS label \
FROM observations \
UNION ALL \
SELECT x, 'cleaned' AS label\
FROM observations \
WHERE x IN \
(SELECT * FROM normal_outliers)
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=20,
height=4, aspect=2)
plt.xlim(30, 70)
(30.0, 70.0)
B.
# correct
results = %sql SELECT x, 'original' AS label \
FROM observations \
UNION ALL \
SELECT x, 'cleaned' AS label\
FROM observations \
WHERE x NOT IN \
(SELECT * FROM normal_outliers)
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=20,
height=4, aspect=2)
plt.xlim(30, 70)
(30.0, 70.0)
C.
results = %sql SELECT x, 'original' AS label \
FROM observations \
WHERE x NOT IN \
(SELECT * FROM normal_outliers) \
UNION ALL \
SELECT x, 'cleaned' AS label \
FROM observations \
WHERE x NOT IN \
(SELECT * FROM normal_outliers)
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=20,
height=4, aspect=2)
plt.xlim(30, 70)
(30.0, 70.0)
D.
results = %sql SELECT x, 'cleaned' AS label \
FROM observations \
WHERE x NOT IN \
(SELECT * FROM normal_outliers)
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=20,
height=4, aspect=2)
plt.xlim(30, 70)
(30.0, 70.0)
%sql UPDATE observations SET x = x*10 \
WHERE x = (SELECT MAX(x) FROM observations);
%sql SELECT min(x), max(x) FROM observations
min | max |
---|---|
33.81466007123295 | 681.9343532189914 |
results = %sql SELECT x, 'original' AS label \
FROM observations \
UNION ALL \
SELECT x, 'cleaned' AS label\
FROM observations \
WHERE x NOT IN \
(SELECT * FROM normal_outliers)
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=np.linspace(32, 70, 20),
height=4, aspect=1.5, rug=True)
plt.xlim(30, 700)
(30.0, 700.0)
results = %sql SELECT x, 'original' AS label \
FROM observations \
UNION ALL \
SELECT x, 'cleaned' AS label\
FROM observations \
WHERE x NOT IN \
(SELECT * FROM normal_outliers)
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=np.linspace(32, 70, 20),
height=4, aspect=1.5, rug=True)
plt.xlim(30, 70)
(30.0, 70.0)
## reset before moving on
%sql UPDATE observations SET x = x/10 \
WHERE x = (SELECT MAX(x) FROM OBSERVATIONS);
import numpy as np
arr = np.array([92, 19, 101, 58, 1053, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89, 89, 28, -5, 41])
arr
array([ 92, 19, 101, 58, 1053, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89, 89, 28, -5, 41])
np.mean(arr), np.median(arr)
(101.5, 68.0)
np.percentile(arr, 95, method='nearest')
101
np.percentile(arr, 5, method='nearest')
-5
arr_winsorized = np.array([92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -5, 101, 86, 85, 15, 89, 89, 28, -5, 41])
arr_winsorized
array([ 92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -5, 101, 86, 85, 15, 89, 89, 28, -5, 41])
np.mean(arr_winsorized), np.median(arr_winsorized)
(55.65, 68.0)
%%sql
CREATE OR REPLACE VIEW p1p99 AS
SELECT percentile_cont(.01) WITHIN GROUP (ORDER BY x) AS p1,
percentile_cont(.99) WITHIN GROUP (ORDER BY x) AS p99
FROM observations;
SELECT * FROM p1p99;
p1 | p99 |
---|---|
39.11510510063385 | 62.38853671085507 |
%%sql
CREATE OR REPLACE VIEW trimmed_observations AS
SELECT o.x, 'trimmed' AS label
FROM observations o, p1p99 p
WHERE o.x BETWEEN p.p1 AND p.p99
UNION ALL
SELECT o.x, 'original' AS label
FROM observations o;
CREATE OR REPLACE VIEW trimmed_outliers AS
SELECT o.*
FROM observations o, p1p99 p
WHERE o.x NOT BETWEEN p.p1 AND p.p99;
results = %sql SELECT * from trimmed_observations
sns.displot(results.dict(), x="x", kind='hist', hue='label', kde=True, bins=20,
height=4, aspect=1.5)
<seaborn.axisgrid.FacetGrid at 0x7f2e50e25390>
# corrupt one value
%sql UPDATE observations SET x = x*10 \
WHERE x = (SELECT MAX(x) FROM observations);
Turns out, we still have as many outliers, which we trim.
Because we are trimming based on the order of the data and not on the distribution statistics (mean/SD)
results = %sql SELECT * from trimmed_observations
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=np.linspace(32, 70, 20),
height=4, aspect=1.5, rug=True)
plt.xlim(30, 70)
(30.0, 70.0)
## reset before moving on
# %sql UPDATE observations SET x = x*10 \
# WHERE x = (SELECT MAX(x) FROM OBSERVATIONS);
%%sql
CREATE OR REPLACE VIEW winsorized_observations AS
SELECT CASE WHEN o.x BETWEEN p.p1 AND p.p99 THEN o.x
WHEN o.x < p.p1 THEN p.p1
WHEN o.x > p.p99 THEN p.p99
END AS x,
'winsorized' AS label
FROM observations o, p1p99 p
UNION ALL
SELECT o.x, 'original' AS label
FROM observations o;
results = %sql SELECT * from winsorized_observations
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=np.linspace(32, 70, 20),
height=4, aspect=1.5, rug=True)
plt.xlim(30, 70)
(30.0, 70.0)
%%sql
SELECT 'orig' AS distribution, min(x),
percentile_disc(.25) WITHIN GROUP (ORDER BY x) as p25,
percentile_disc(.50) WITHIN GROUP (ORDER BY x) as median,
percentile_disc(.75) WITHIN GROUP (ORDER BY x) as p75,
max(x), avg(x), stddev(x), count(x)
FROM observations
UNION ALL
SELECT 'winsorized', min(x),
percentile_disc(.25) WITHIN GROUP (ORDER BY x) as p25,
percentile_disc(.50) WITHIN GROUP (ORDER BY x) as median,
percentile_disc(.75) WITHIN GROUP (ORDER BY x) as p75,
max(x), avg(x), stddev(x), count(x)
FROM winsorized_observations WHERE label = 'winsorized'
UNION ALL
SELECT 'trimmed', min(x),
percentile_disc(.25) WITHIN GROUP (ORDER BY x) as p25,
percentile_disc(.50) WITHIN GROUP (ORDER BY x) as median,
percentile_disc(.75) WITHIN GROUP (ORDER BY x) as p75,
max(x), avg(x), stddev(x), count(x)
FROM trimmed_observations WHERE label = 'trimmed';
distribution | min | p25 | median | p75 | max | avg | stddev | count |
---|---|---|---|---|---|---|---|---|
orig | 33.81466007123295 | 46.590796172482186 | 49.89707347076867 | 53.06512874818042 | 681.9343532189914 | 50.4731336114083 | 20.56765062228802 | 1000 |
winsorized | 39.11510510063385 | 46.590796172482186 | 49.89707347076867 | 53.06512874818042 | 62.38853671085507 | 49.85009437230814 | 4.7890523372867415 | 1000 |
trimmed | 39.11624475909565 | 46.6622689719359 | 49.89707347076867 | 52.92677414266813 | 62.38553193932125 | 49.83169178999314 | 4.540953142225145 | 980 |
%%sql
-- percentile_disc returns an actual data value near the percentile
-- percentile_cont returns an interpolated value at the percentile
CREATE OR REPLACE VIEW median AS
(SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY x) as median
FROM observations);
%%sql
CREATE OR REPLACE VIEW mad AS
WITH
absdevs AS
(SELECT abs(x - median) as d
FROM observations, median)
SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY d) as mad
FROM absdevs;
SELECT median, mad
FROM median, mad;
median | mad |
---|---|
49.89707347076867 | 3.228948745848946 |
%%sql
CREATE OR REPLACE VIEW hampelx84x2_observations AS
SELECT o.x,
'hampelx84x2' AS label
FROM observations o, median, mad
WHERE o.x BETWEEN (median-2*1.4826*mad) AND (median+2*1.4826*mad)
UNION ALL
SELECT o.x, 'orig' AS label
FROM observations o;
CREATE OR REPLACE VIEW Hampel84x2_outliers AS
SELECT x
FROM observations o, median, mad
WHERE x NOT BETWEEN (median - 2*1.4826*mad) AND (median + 2*1.4826*mad);
results = %sql SELECT * FROM hampelx84x2_observations
sns.displot(results.dict(), x="x", kind='hist',
hue='label', kde=True, bins=np.linspace(32, 70, 20),
height=4, aspect=1.5, rug=True)
plt.xlim(30, 70)
(30.0, 70.0)
This is the GNIS dataset from the previous lecture. If you didn't load in the database, run the below cell before connecting.
!psql -h localhost -d gnis -c 'SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid();'
!psql -h localhost -c 'DROP DATABASE IF EXISTS gnis'
!psql -h localhost -c 'CREATE DATABASE gnis'
!psql -h localhost -d gnis -f ../lec13/data/gnis.sql
%reload_ext sql
%sql postgresql://localhost:5432/gnis
%sql SELECT setseed(0.12345);
setseed |
---|
%%sql
DROP TABLE IF EXISTS holey CASCADE;
CREATE TABLE holey AS
SELECT feature_id, feature_name, feature_class, state_alpha, county_name,
prim_lat_dec, prim_long_dec,
CASE WHEN random() > 0.9 THEN NULL
ELSE elev_in_m
END AS elev_in_m
FROM national;
SELECT count(elev_in_m)::float / count(*) FROM holey;
?column? |
---|
0.8755744385675887 |
%%sql
WITH elevavg AS (SELECT avg(elev_in_m) FROM holey)
SELECT h.*,
CASE WHEN h.elev_in_m IS NOT NULL THEN h.elev_in_m
ELSE e.avg
END AS imputed_elev_in_m
FROM holey h, elevavg e
LIMIT 10;
feature_id | feature_name | feature_class | state_alpha | county_name | prim_lat_dec | prim_long_dec | elev_in_m | imputed_elev_in_m |
---|---|---|---|---|---|---|---|---|
1230 | Belmont Mountains | Range | AZ | Maricopa | 33.642258 | -112.9010129 | 931.0 | 931.0 |
1839 | Bootlegger Saddle | Gap | AZ | Cochise | 31.8931474 | -109.2831176 | 2707.0 | 2707.0 |
2336 | Cabeza Prieta Game Range | Park | AZ | Yuma | 32.250056 | -113.45074 | 275.0 | 275.0 |
2750 | Chandler Springs | Spring | AZ | Navajo | 35.3766788 | -110.4754096 | 1685.0 | 1685.0 |
3032 | Cline Well | Well | AZ | Cochise | 31.9000849 | -110.3428525 | None | 483.962467815409 |
3039 | Clover Tank | Reservoir | AZ | Gila | 33.8509816 | -110.2577249 | 1563.0 | 1563.0 |
3060 | Coat Spring | Spring | AZ | Navajo | 36.12678 | -110.3330424 | 1926.0 | 1926.0 |
3143 | Comar Spring | Spring | AZ | Navajo | 35.5308428 | -110.4162424 | 1732.0 | 1732.0 |
3333 | Cottonwood Creek | Stream | AZ | Coconino | 36.050817 | -111.9865535 | 800.0 | 800.0 |
3342 | Cottonwood Creek | Stream | AZ | Mohave | 36.901931 | -112.5632547 | 1389.0 | 1389.0 |
# Training is an aggregate function
# Here we'll train the model in SQL just for fun
result = %sql SELECT regr_slope(elev_in_m, prim_long_dec), \
regr_intercept(elev_in_m, prim_long_dec) FROM holey
slope, intercept = result[0]
slope, intercept
(-10.314179001097786, -477.9603219322606)
%%sql
SELECT *,
CASE WHEN elev_in_m IS NOT NULL THEN elev_in_m
ELSE {{slope}}*prim_long_dec + {{intercept}}
END AS imputed_elev_in_m
FROM holey
LIMIT 10;
feature_id | feature_name | feature_class | state_alpha | county_name | prim_lat_dec | prim_long_dec | elev_in_m | imputed_elev_in_m |
---|---|---|---|---|---|---|---|---|
1230 | Belmont Mountains | Range | AZ | Maricopa | 33.642258 | -112.9010129 | 931.0 | 931.0 |
1839 | Bootlegger Saddle | Gap | AZ | Cochise | 31.8931474 | -109.2831176 | 2707.0 | 2707.0 |
2336 | Cabeza Prieta Game Range | Park | AZ | Yuma | 32.250056 | -113.45074 | 275.0 | 275.0 |
2750 | Chandler Springs | Spring | AZ | Navajo | 35.3766788 | -110.4754096 | 1685.0 | 1685.0 |
3032 | Cline Well | Well | AZ | Cochise | 31.9000849 | -110.3428525 | None | 660.1356102444698 |
3039 | Clover Tank | Reservoir | AZ | Gila | 33.8509816 | -110.2577249 | 1563.0 | 1563.0 |
3060 | Coat Spring | Spring | AZ | Navajo | 36.12678 | -110.3330424 | 1926.0 | 1926.0 |
3143 | Comar Spring | Spring | AZ | Navajo | 35.5308428 | -110.4162424 | 1732.0 | 1732.0 |
3333 | Cottonwood Creek | Stream | AZ | Coconino | 36.050817 | -111.9865535 | 800.0 | 800.0 |
3342 | Cottonwood Creek | Stream | AZ | Mohave | 36.901931 | -112.5632547 | 1389.0 | 1389.0 |
We won't show the demo; check slides for the general idea.
%config SqlMagic.displaylimit = 100
%%sql
-- The following doesn't work in PostgreSQL!
WITH buggy AS (
SELECT *,
CASE WHEN elev_in_m IS NOT NULL THEN elev_in_m
ELSE lag(elev_in_m, 1)
OVER (ORDER BY feature_id)
END AS imputed_elev_in_m
FROM holey
)
SELECT * FROM buggy ORDER BY elev_in_m DESC LIMIT 100;
feature_id | feature_name | feature_class | state_alpha | county_name | prim_lat_dec | prim_long_dec | elev_in_m | imputed_elev_in_m |
---|---|---|---|---|---|---|---|---|
902614 | Blue Spring | Spring | NM | San Miguel | 0.0 | 0.0 | None | 2025.0 |
901753 | Emmanuel Baptist Church | Church | NM | San Miguel | 0.0 | 0.0 | None | None |
901695 | Buffalo Head | Summit | NM | Colfax | 0.0 | 0.0 | None | None |
901689 | Cerro de los Burros | Summit | NM | Rio Arriba | 0.0 | 0.0 | None | 1089.0 |
894278 | San Jose Cemetery | Cemetery | NM | Doña Ana | 32.1148663 | -106.7050186 | None | 1421.0 |
891907 | Mesquite Lake | Reservoir | NM | Luna | 31.9873677 | -107.4663164 | None | 1876.0 |
888921 | Estancia Cemetery | Cemetery | NM | Torrance | 34.7412091 | -106.0803887 | None | 2039.0 |
900845 | Bessie Case Spring | Spring | NM | Lincoln | 33.4111999 | -105.9607228 | None | None |
896734 | Wolf Creek | Stream | NM | Mora | 35.8089296 | -104.9183328 | None | 1263.0 |
886608 | Boone Well | Locale | NM | Doña Ana | 32.4270352 | -106.5116614 | None | 1396.0 |
883621 | Windsor Park | Populated Place | NJ | Ocean | 39.9534514 | -74.1495839 | None | None |
887946 | Cottonwood Wash | Stream | NM | San Juan | 36.7780573 | -108.2509094 | None | None |
883958 | Louise Lake Dam | Dam | NJ | Monmouth | 40.1283729 | -74.1978092 | None | 276.0 |
877984 | Ludlam Creek | Stream | NJ | Cape May | 39.2839001 | -74.6830684 | None | 32.0 |
888206 | Dave Smith Windmill | Locale | NM | Grant | 32.4139724 | -108.2322663 | None | 1786.0 |
862959 | Ichabod Range | Range | NV | Elko | 41.7235174 | -115.6278542 | None | 2030.0 |
900669 | Gutierrez Spring | Spring | NM | Otero | 33.058974 | -105.9724903 | None | 2214.0 |
874693 | Berkley Church | Church | NJ | Gloucester | 39.8090023 | -75.2140704 | None | 284.0 |
766085 | Saint Nicholas Church (historical) | Church | MO | St. Louis (city) | 0.0 | 0.0 | None | 175.0 |
890667 | Kenna Draw | Valley | NM | Chaves | 33.8031449 | -103.8819067 | None | 1194.0 |
855500 | Maggie Mine | Mine | NV | Lander | 39.5379808 | -117.0528675 | None | 1605.0 |
805412 | McCrea Mine | Mine | MT | Lewis and Clark | 0.0 | 0.0 | None | 1882.0 |
758604 | Stephens and Akeman Airport (historical) | Airport | MO | Chariton | 39.4653017 | -93.1946478 | None | 141.0 |
883489 | Locust Manor | Populated Place | NJ | Ocean | 40.0501163 | -74.2323659 | None | None |
887875 | Corral | Locale | NM | Eddy | 32.0112363 | -104.0027044 | None | 1321.0 |
857783 | Mountain Spring (historical) | Locale | NV | Pershing | 0.0 | 0.0 | None | None |
846631 | Dean Spring Number Three | Spring | NV | Eureka | 40.2069375 | -116.5423132 | None | 1310.0 |
831935 | Overton Lake | Lake | NE | Holt | 42.285096 | -99.2251813 | None | 483.0 |
857977 | Edgemont School (historical) | School | NV | Elko | 41.7107339 | -116.2909322 | None | None |
847043 | Niagara Cañon | Valley | NV | Nye | 38.7693237 | -117.2054653 | None | 1962.0 |
864086 | Sunnyside-Kirch Wildlife Management Area Airport (historical) | Airport | NV | Nye | 38.4146731 | -115.03668 | None | 1289.0 |
861013 | Potts Ranch | Locale | NV | Nye | 39.0865971 | -116.6411881 | None | 1709.0 |
838724 | Big Canyon Ranch | Locale | NV | Washoe | 40.0849059 | -119.7410274 | None | 2004.0 |
856083 | Millers | Populated Place | NV | Esmeralda | 38.1365977 | -117.4575928 | None | 1969.0 |
884506 | Albert G Waters Stadium | Park | NJ | Middlesex | 40.5242719 | -74.2723682 | None | 95.0 |
886172 | Beeson Station | Locale | NM | Eddy | 32.7828953 | -103.9596741 | None | 1878.0 |
857694 | Centennial (historical) | Locale | NV | Nye | 0.0 | 0.0 | None | 555.0 |
764862 | Trautman Lake Dam | Dam | MO | Ste. Genevieve | 37.8550524 | -90.2984542 | None | 288.0 |
755525 | Valley Knob | Summit | MO | Dent | 0.0 | 0.0 | None | 348.0 |
757560 | Bete Creek | Stream | MO | St. Louis | 0.0 | 0.0 | None | None |
880021 | Saint John's Roman Catholic Church | Church | NJ | Essex | 40.738157 | -74.1659776 | None | 26.0 |
869862 | Shaker Branch | Stream | NH | Merrimack | 43.3184132 | -71.4689601 | None | 594.0 |
833535 | Solso School | School | NE | Madison | 41.7644533 | -97.6972752 | None | None |
847442 | One Thousand One Ranch | Locale | NV | Lincoln | 37.741076 | -114.4449911 | None | 1811.0 |
791922 | Taylor Mine | Mine | MT | Beaverhead | 45.233256 | -112.9894813 | None | 1774.0 |
798110 | Nomland Dam | Dam | MT | McCone | 47.7366747 | -106.1472436 | None | 763.0 |
738277 | Sugarcamp Bluff | Cliff | MO | Camden | 37.9044801 | -92.8907419 | None | 329.0 |
745559 | Bacon (historical) | Locale | MO | Cass | 0.0 | 0.0 | None | 276.0 |
854196 | Old Humboldt Mine | Mine | NV | Lander | 40.6135218 | -117.0892717 | None | 1907.0 |
830975 | Lynchburg Cemetery | Cemetery | NE | Richardson | 40.1306579 | -95.9071931 | None | 693.0 |
882713 | West Fork Minagamahone Brook | Stream | NJ | Monmouth | 40.2383834 | -74.1615129 | None | 0.0 |
883462 | Hightstown High School | School | NJ | Mercer | 40.2612126 | -74.5301502 | None | 21.0 |
837604 | Humboldt Hospital Heliport | Airport | NE | Richardson | 40.1666672 | -95.9336091 | None | 366.0 |
814008 | 04N02E18ACAC01 Well | Well | MT | Broadwater | 46.1021509 | -111.5238582 | None | 812.0 |
752036 | Sargent School | School | MO | Texas | 37.088386 | -92.0209876 | None | 269.0 |
761826 | Deimeke Lake | Reservoir | MO | Audrain | 39.1954006 | -91.9053704 | None | 236.0 |
804344 | SW SW Section 16 Mine | Mine | MT | Madison | 45.6604819 | -111.986931 | None | 1882.0 |
799092 | Widgeon Slough | Lake | MT | Sheridan | 48.9647464 | -104.245728 | None | 1371.0 |
824195 | 22N02W30B___01 Well | Well | MT | Teton | 47.6363413 | -111.9180529 | None | 771.0 |
828506 | Cub Creek | Stream | NE | Jefferson | 40.2447236 | -97.1136453 | None | 578.0 |
810480 | 03S44E03ACA_01 Well | Well | MT | Rosebud | 45.6083332 | -106.2822366 | None | 1596.0 |
712928 | Post Post Office (historical) | Post Office | MS | Lauderdale | 0.0 | 0.0 | None | None |
789082 | Pony Mine | Mine | MT | Madison | 45.6679821 | -111.9544299 | None | 1362.0 |
847886 | Foreys Toll House (historical) | Locale | NV | Mineral | 37.9665971 | -118.3381755 | None | 1877.0 |
810266 | 03S25E07C___01 Well | Well | MT | Yellowstone | 45.5843947 | -108.6884727 | None | 1045.0 |
845041 | Woods Ditch | Canal | NV | Lyon | 38.9513059 | -119.1682088 | None | 2835.0 |
676587 | Reform Chapel | Church | MS | Choctaw | 33.4304009 | -89.1458978 | None | 58.0 |
775680 | Redstone | Populated Place | MT | Sheridan | 48.8216926 | -104.9441394 | None | 1940.0 |
833397 | Shickley | Populated Place | NE | Fillmore | 40.4166737 | -97.7225446 | None | 809.0 |
804835 | D J Silver/Saint Louis Mine | Mine | MT | Missoula | 47.2413172 | -114.7120716 | None | 1126.0 |
824829 | 23N50E14CABB01 Well | Well | MT | Dawson | 47.7539035 | -105.2705401 | None | 586.0 |
757694 | Our Lady of Lourdes Church (historical) | Church | MO | St. Louis | 0.0 | 0.0 | None | None |
826167 | 27N26E17CBCB01 Well | Well | MT | Phillips | 48.0936077 | -108.4101416 | None | 1008.0 |
670011 | Flag Chapel Baptist Church | Church | MS | Hinds | 32.3509782 | -90.2650901 | None | 113.0 |
516216 | Walnut Grove Church | Church | KY | Owsley | 37.3914773 | -83.7496406 | None | 390.0 |
754345 | Ruth | Populated Place | MO | Stone | 0.0 | 0.0 | None | 426.0 |
739137 | Huff Branch | Stream | MO | Johnson | 0.0 | 0.0 | None | None |
757643 | Hillside (historical) | Building | MO | St. Louis | 0.0 | 0.0 | None | None |
579778 | Troy Center | Populated Place | ME | Waldo | 44.682568 | -69.2567136 | None | 51.0 |
706386 | Mount Carmel Cemetery | Cemetery | MS | Lauderdale | 32.5373557 | -88.7183739 | None | 125.0 |
753331 | Cureall School | Locale | MO | Howell | 36.678672 | -92.0707096 | None | 252.0 |
743734 | Grand Prairie Presbyterian Church (historical) | Church | MO | Randolph | 0.0 | 0.0 | None | None |
730185 | Raccoon Creek | Stream | MO | Grundy | 40.1286217 | -93.702999 | None | 255.0 |
857174 | Strode Ranch | Locale | NV | Elko | 0.0 | 0.0 | None | 1385.0 |
807886 | 08S49E32CB__01 Well | Well | MT | Powder River | 45.0941536 | -105.7530619 | None | 1187.0 |
817293 | 10N19W07DDDC01 Well | Well | MT | Ravalli | 46.6315896 | -114.0273283 | None | 1169.0 |
737285 | Cicero | Building | MO | Washington | 0.0 | 0.0 | None | None |
757755 | Walnut Plains Camp Grounds (historical) | Locale | MO | St. Louis | 0.0 | 0.0 | None | None |
757982 | Boyers Prairie | Area | MO | St. Francois | 0.0 | 0.0 | None | None |
802880 | Daly Gulch Placer Mine | Mine | MT | Deer Lodge | 46.1938141 | -113.2378348 | None | 2258.0 |
712334 | Fitzhugh Post Office (historical) | Post Office | MS | Sunflower | 0.0 | 0.0 | None | None |
543368 | La Rosen | Populated Place | LA | Caddo | 32.4034884 | -93.7893467 | None | 27.0 |
744733 | Wrights Mill | Locale | MO | Miller | 0.0 | 0.0 | None | 330.0 |
754721 | Three Springs Camp Ground | Locale | MO | Butler | 0.0 | 0.0 | None | 455.0 |
737097 | Wesco School | School | MO | Crawford | 0.0 | 0.0 | None | None |
736860 | Berrys Mill | Locale | MO | Madison | 0.0 | 0.0 | None | 307.0 |
743277 | Beck Cemetery | Cemetery | MO | Atchison | 40.3220754 | -95.4544788 | None | 289.0 |
736877 | Marvin Collegiate Institute | School | MO | Madison | 0.0 | 0.0 | None | None |
730991 | Claypool School | School | MO | Scott | 0.0 | 0.0 | None | None |
816507 | 09N01E35CB__02 Well | Well | MT | Broadwater | 46.492156 | -111.5752493 | None | 1675.0 |
%%sql
-- Here's a UDA fix from
-- https://stackoverflow.com/questions/18987791/how-do-i-efficiently-select-the-previous-non-null-value
CREATE OR REPLACE FUNCTION coalesce_agg_sfunc(state anyelement, value anyelement) RETURNS anyelement AS
$$
SELECT coalesce(value, state);
$$ LANGUAGE SQL;
CREATE AGGREGATE coalesce_agg(anyelement) (
SFUNC = coalesce_agg_sfunc,
STYPE = anyelement);
%%sql
-- Fixed to handle repeated NULLs
WITH fixed AS (
SELECT *,
coalesce_agg(elev_in_m) OVER (order by feature_id) AS imputed_elev_in_m
FROM holey
)
SELECT * FROM fixed ORDER BY imputed_elev_in_m DESC LIMIT 100;
feature_id | feature_name | feature_class | state_alpha | county_name | prim_lat_dec | prim_long_dec | elev_in_m | imputed_elev_in_m |
---|---|---|---|---|---|---|---|---|
1416580 | Browne Tower | Summit | AK | Denali | 63.1000583 | -150.9314441 | 4628.0 | 4628.0 |
188010 | Grizzly Peak | Summit | CO | La Plata | 37.6076296 | -107.5818648 | 4171.0 | 4171.0 |
191939 | Spread Eagle Peak | Summit | CO | Custer | 38.1252369 | -105.6435197 | 4090.0 | 4090.0 |
187393 | Emery Peak | Summit | CO | San Juan | 37.8882556 | -107.6210535 | 4054.0 | 4054.0 |
181832 | Baker Mountain | Summit | CO | Clear Creek | 39.6607654 | -105.8402538 | 3832.0 | 3832.0 |
1703800 | Lucky Jim One Mine | Mine | CO | Park | 39.1519347 | -105.742509 | None | 3728.0 |
1703775 | Park County Claims Mine | Mine | CO | Park | 39.3510997 | -105.9869614 | 3728.0 | 3728.0 |
393894 | Donaldson Peak | Summit | ID | Custer | 44.0639981 | -113.7003853 | 3661.0 | 3661.0 |
1599419 | Dundee Mountain | Summit | WY | Park | 43.8771622 | -109.2682693 | 3640.0 | 3640.0 |
939450 | Master Man Mine | Mine | NM | Colfax | 36.6297542 | -105.2088936 | 3633.0 | 3633.0 |
179902 | Ceresco Number 4 Adit | Mine | CO | Lake | 39.3644327 | -106.1714108 | 3604.0 | 3604.0 |
189898 | Antoro Shaft | Mine | CO | Saguache | 38.3297213 | -106.125297 | 3577.0 | 3577.0 |
1684419 | Maumee Mine | Mine | CO | Park | 39.269434 | -106.1461318 | 3555.0 | 3555.0 |
1598169 | Big Sheep Mountain | Summit | WY | Sublette | 43.2751868 | -109.8786551 | 3537.0 | 3537.0 |
1603836 | Twins Glacier | Glacier | WY | Sublette | 43.149673 | -109.6540386 | 3517.0 | 3517.0 |
1427800 | Mount Ellen | Range | UT | Garfield | 38.1097069 | -110.8137658 | 3501.0 | 3501.0 |
1734540 | Gold Run Placer | Mine | CO | San Miguel | 37.831384 | -107.8345093 | 3449.0 | 3449.0 |
1659681 | Slide Mountain | Summit | CA | Mono | 38.0931634 | -119.4458359 | 3379.0 | 3379.0 |
178928 | Windy Gap | Gap | CO | Montezuma | 37.4749967 | -108.0917424 | 3354.0 | 3354.0 |
197523 | Payne Gulch | Valley | CO | Gunnison | 38.6838024 | -106.4736984 | 3340.0 | 3340.0 |
780007 | Black Mountain | Summit | MT | Park | 45.4713346 | -110.4664499 | 3337.0 | 3337.0 |
203950 | Trachyte Knob | Summit | CO | Teller | 38.7731142 | -105.1065711 | 3314.0 | 3314.0 |
179985 | Buckskin Joe Mine | Mine | CO | Park | 39.2899894 | -106.0955752 | 3309.0 | 3309.0 |
189852 | Sheep Mountain Trail | Trail | CO | Saguache | 38.3297215 | -106.1889107 | 3298.0 | 3298.0 |
2572516 | Windy Lake | Lake | WY | Fremont | 42.6200138 | -108.9825615 | 3274.0 | 3274.0 |
1444260 | Pearl Lake | Lake | UT | Uintah | 40.7891427 | -109.9571503 | 3263.0 | 3263.0 |
181328 | Floral Park Campground | Locale | CO | Clear Creek | 39.7894314 | -105.7805628 | 3258.0 | 3258.0 |
1724633 | Garfield Grouse Mine | Mine | CO | Teller | 38.7330442 | -105.1363688 | 3251.0 | 3251.0 |
186674 | Copley Lake | Lake | CO | Gunnison | 38.8722009 | -107.0830674 | 3251.0 | 3251.0 |
1414051 | Mount Spurr | Summit | AK | Kenai Peninsula | 61.2997222 | -152.2513889 | 3248.0 | 3248.0 |
779022 | Anaconda Range | Range | MT | Deer Lodge | 46.0500221 | -113.1901018 | 3246.0 | 3246.0 |
1446855 | Upper Chain Lake | Reservoir | UT | Duchesne | 40.7113249 | -110.2569397 | 3240.0 | 3240.0 |
931481 | Double Eagle V Ski Trail | Trail | NM | Santa Fe | 35.7911406 | -105.7950188 | 3240.0 | 3240.0 |
262684 | Little Papoose Creek | Stream | CA | Trinity | 40.8009791 | -122.735855 | None | 3224.0 |
262186 | Kings River Trail | Trail | CA | Inyo | 37.1679871 | -118.48483 | 3224.0 | 3224.0 |
1441700 | Henderson Creek | Stream | UT | Garfield | 37.6077619 | -112.0557457 | None | 3223.0 |
1441572 | Hancock Peak | Summit | UT | Iron | 37.6349097 | -112.7619161 | 3223.0 | 3223.0 |
187877 | Raber Lohr Ditch | Canal | CO | Hinsdale | 37.6786111 | -107.3222767 | 3218.0 | 3218.0 |
181537 | Utah Hill | Summit | CO | Gilpin | 39.861742 | -105.5629767 | 3202.0 | 3202.0 |
1735447 | Little Keystone Shaft Mine | Mine | CO | Lake | 39.2477668 | -106.4028044 | 3196.0 | 3196.0 |
861027 | Prospect Peak | Summit | NV | Elko | 41.7961632 | -115.3514744 | 3176.0 | 3176.0 |
189182 | Lienhart Mine | Mine | CO | Chaffee | 38.9377726 | -106.2547437 | None | 3173.0 |
189124 | Holywater Mine | Mine | CO | Chaffee | 38.8094397 | -106.358913 | 3173.0 | 3173.0 |
1693838 | Eva Mine Complex | Mine | CO | Clear Creek | 39.6794316 | -105.7277832 | 3166.0 | 3166.0 |
1600421 | Horse Pasture Draw | Valley | WY | Sublette | 42.913281 | -110.3380592 | None | 3151.0 |
1599988 | Goosewing Guard Station | Locale | WY | Teton | 43.5629975 | -110.3032447 | None | 3151.0 |
1599888 | Galena Basin | Basin | WY | Park | 43.905233 | -109.2923677 | 3151.0 | 3151.0 |
177789 | Green Mountain | Summit | CO | Grand | 40.2961549 | -105.8221575 | 3138.0 | 3138.0 |
181517 | Nugget | Populated Place | CO | Gilpin | 39.8635984 | -105.5883336 | 3128.0 | 3128.0 |
1428713 | Highline Trail | Trail | UT | Duchesne | 40.688557 | -110.8173912 | 3120.0 | 3120.0 |
199320 | Trio Dam | Dam | CO | Delta | 39.0466474 | -107.8356185 | 3108.0 | 3108.0 |
862430 | Great Basin National Park | Park | NV | White Pine | 38.9460953 | -114.2579658 | 3102.0 | 3102.0 |
173389 | Crater Lake | Lake | CO | Rio Blanco | 40.1400804 | -107.1207371 | 3097.0 | 3097.0 |
1667345 | Little Shot Lake | Lake | CA | Fresno | 37.1252053 | -118.8260505 | 3096.0 | 3096.0 |
189116 | East Willow Creek | Stream | CO | Gunnison | 38.7538837 | -106.4819717 | 3089.0 | 3089.0 |
1684255 | Pacific-Gold Dust Mine | Mine | CO | Summit | 39.4685983 | -106.0244624 | 3089.0 | 3089.0 |
175365 | Little Giant Reservoir Number 1 | Reservoir | CO | Delta | 39.0405469 | -107.7873713 | 3084.0 | 3084.0 |
1724727 | Lexington Mine | Mine | CO | Teller | 38.7405442 | -105.1560918 | 3076.0 | 3076.0 |
176038 | Dubach Meadows | Flat | CO | Eagle | 39.5402631 | -106.6630902 | 3074.0 | 3074.0 |
917164 | Hidden Treasure Mine | Mine | NM | Colfax | 36.6114208 | -105.2411167 | 3068.0 | 3068.0 |
396870 | Mount McGuire | Summit | ID | Lemhi | 45.174046 | -114.602137 | 3059.0 | 3059.0 |
1684932 | Allied Gold Mining Company Mine | Mine | CO | Boulder | 39.9930424 | -105.576667 | 3049.0 | 3049.0 |
176195 | Guller Creek | Stream | CO | Summit | 39.5035976 | -106.1758565 | 3035.0 | 3035.0 |
176528 | McKinney Draw | Valley | CO | Dolores | 37.8663073 | -108.5713788 | None | 3035.0 |
1445191 | Roundy Spring | Spring | UT | Garfield | 38.0396633 | -111.6903788 | 3025.0 | 3025.0 |
1445316 | Sand Wash | Stream | UT | Garfield | 37.8838658 | -112.425208 | None | 3025.0 |
199786 | Kendall Dam | Dam | CO | Mesa | 39.1149804 | -107.8072845 | 3024.0 | 3024.0 |
1442905 | Long Flat | Flat | UT | Piute | 38.2683083 | -112.3552077 | 3011.0 | 3011.0 |
1444807 | Red Lake | Lake | UT | Daggett | 40.7950226 | -109.8341217 | 3004.0 | 3004.0 |
909123 | North Baldy | Summit | NM | Socorro | 34.0503013 | -107.1812222 | 3002.0 | 3002.0 |
190348 | Rudifor Lake | Lake | CO | Rio Grande | 37.5242201 | -106.6722501 | 2997.0 | 2997.0 |
176590 | Kilpacker Creek | Stream | CO | Dolores | 37.8233271 | -108.0511831 | 2995.0 | 2995.0 |
261678 | Iceberg Lake | Lake | CA | Madera | 37.6704116 | -119.1691178 | 2981.0 | 2981.0 |
1438313 | Avon | Populated Place | UT | Cache | 41.5329911 | -111.8132743 | None | 2978.0 |
1438255 | Antimony Knoll | Summit | UT | Garfield | 38.0514094 | -111.815261 | 2978.0 | 2978.0 |
1683679 | Majestic Mine | Mine | CO | Boulder | 40.1247079 | -105.5588892 | 2978.0 | 2978.0 |
806032 | Imelda Lake | Lake | MT | Park | 45.1663594 | -109.9120372 | 2977.0 | 2977.0 |
931531 | Geronimo Ski Trail | Trail | NM | Lincoln | 33.3973012 | -105.7885986 | 2976.0 | 2976.0 |
930659 | Wild Onion Ski Trail | Trail | NM | Lincoln | 33.3973012 | -105.7885986 | 2976.0 | 2976.0 |
1447716 | Manning Meadows Reservoir | Reservoir | UT | Piute | 38.4904107 | -112.0702195 | 2971.0 | 2971.0 |
173999 | Deadhorse Spring | Spring | CO | Garfield | 39.6572051 | -107.2211646 | 2962.0 | 2962.0 |
172337 | Lower Rainbow Lake | Lake | CO | Jackson | 40.6481244 | -106.6193546 | 2960.0 | 2960.0 |
268442 | Trumbull Lake | Lake | CA | Mono | 38.0534252 | -119.2578319 | 2957.0 | 2957.0 |
188507 | Phelps Cabin Creek | Stream | CO | Hinsdale | 38.1488844 | -107.1053286 | 2954.0 | 2954.0 |
1446325 | Tall Four Reservoir | Reservoir | UT | Garfield | 37.9200023 | -111.8341652 | 2950.0 | 2950.0 |
1685340 | Legion Mine | Mine | CO | Clear Creek | 39.7794318 | -105.6813922 | 2942.0 | 2942.0 |
1609296 | Sepulcher Mountain | Summit | WY | Park | 44.9903881 | -110.7665775 | 2939.0 | 2939.0 |
2664241 | Tucker Ponds | Lake | CO | Mineral | 37.49492 | -106.75954 | 2934.0 | 2934.0 |
1724854 | Rose Maud Mine | Mine | CO | Teller | 38.7324889 | -105.1574808 | 2932.0 | 2932.0 |
853139 | Puritan Mine | Mine | NV | Elko | 41.8485179 | -115.3800676 | 2910.0 | 2910.0 |
186986 | McIntosh Mountain | Summit | CO | Gunnison | 38.5750862 | -107.014652 | 2905.0 | 2905.0 |
1447736 | Farnsworth Dam | Dam | UT | Sevier | 38.7733041 | -111.6624129 | 2897.0 | 2897.0 |
269289 | Woodchuck Country | Area | CA | Fresno | 37.0174429 | -118.8887203 | 2884.0 | 2884.0 |
859080 | Buller Mountain | Summit | NV | Mineral | 38.3808139 | -118.6877054 | 2880.0 | 2880.0 |
1606084 | Thompson Reservoir | Reservoir | WY | Carbon | 41.1983044 | -106.3655775 | 2878.0 | 2878.0 |
183510 | The Castle | Pillar | CO | Jefferson | 39.3105454 | -105.3824978 | 2873.0 | 2873.0 |
39068 | Lee Valley Dam | Dam | AZ | Apache | 33.9417151 | -109.4989794 | 2870.0 | 2870.0 |
180170 | Crystal Lakes | Lake | CO | Lake | 39.1950626 | -106.3383877 | 2867.0 | 2867.0 |
2724004 | Petrie Ditch | Canal | CO | Park | 39.3091177 | -105.8102555 | 2862.0 | 2862.0 |
177295 | Hourglass Reservoir | Reservoir | CO | Larimer | 40.5793186 | -105.6319349 | 2859.0 | 2859.0 |