import numpy as np
import pandas as pd


import pandas as pd


mm = pd.read_csv('data/mm.txt', header=0)
mm


mm_melted = mm.melt(id_vars=['Year'])
mm_melted


mm_melted[mm_melted['Year'] == 2002]


#mm_melted.pivot(index='variable', columns='Year')
mm_melted.pivot(index='Year', columns='variable')


mmp = pd.read_csv('data/mmp.txt', header=0)
mmp


# Unpivot
mmp_melted = mmp.melt(id_vars=['Location', 'Station', 'Year', 'ID'])
mmp_melted


# Repivot the unpivot

# mmp_melted.pivot(index='Year', columns='variable')
mmp_tt = mmp_melted.pivot(index=['Location', 'Station', 'Year', 'ID'], columns='variable')
mmp_tt


mmp_tt.reset_index()


%reload_ext sql
%sql postgresql://127.0.0.1:5432/postgres
import pandas as pd


%%sql
drop table if exists blue;
drop table if exists red;
create table blue (last text, first text);
create table red (last text, first text);

insert into blue values ('Wang', 'Daisy');
insert into blue values ('Wang', 'Daisy');
insert into blue values ('Wang', 'Xin');

insert into red values ('Wang', 'Daisy');
insert into red values ('Wang', 'Xin');
insert into red values ('Wang', 'Xin');

select * from blue;


%sql select * from red;


%%sql
drop table if exists bluem;
create table bluem as 
select *, count(*) as multiplicity
  from blue
 group by last, first;

select * from bluem;


%%sql
drop table if exists redm;
create table redm as 
select *, count(*) as multiplicity
  from red
 group by last, first;

select * from redm;


%%sql
-- sigma on multiset
select * from blue 
 where first = 'Daisy';


%%sql
-- sigma on counted set
select * from bluem where first = 'Daisy';


%%sql
-- pi on multiset
select last from blue;


%%sql
select last from bluem;


%%sql
-- pi on counted set
select last, SUM(multiplicity) from bluem group by last;


%%sql
-- x on multiset
select * from blue, red;


%%sql
-- convert multiset x to counted set
  with cte(blast, bfirst, rlast, rfirst)
    as (select * from blue, red)
select *, count(*)
  from cte
 group by blast, bfirst, rlast, rfirst;


%%sql
select * from bluem, redm;


%%sql
-- fix multiplicity per row
select b.last, b.first, r.last, r.first, b.multiplicity*r.multiplicity from bluem b, redm r;


%%sql
select distinct b.last, b.first, r.last, r.first
  from blue b, red r;


%reload_ext sql
%sql postgresql://127.0.0.1:5432/postgres
import pandas as pd


%%sql
drop table if exists example;
create table example(name text, age integer, gpa float);
insert into example values
       ('Patty Perfect', 22, 4.0),
       ('Sameer Soclose', 20, 3.99),
       ('Jacob Excellent', 21, 3.93);


df = %sql select * from example;
df = df.DataFrame()

df


df.dtypes


dft = df.transpose()
dft


dft.dtypes


df2 = df.transpose().transpose()
df2


df2.dtypes


df2['age'] = df2['age'].astype(int)
df2['gpa'] = df2['gpa'].astype(float)


df2.dtypes


mat = np.array([[1, 0, 0, 1, 20000],
          [0, 1, 0, 2, 10011],
          [0, 0, 1, 3, 50000],
          [0, 0, 1, 3, 10000]])
mat


df = pd.DataFrame({'CompanyName': ['VW', 'Acura', 'Hona', 'Honda'],
              'Categorical Value': [1, 2, 3, 3],
              'Price': [20000, 10011, 50000, 10000]})
df


df.dtypes


# %sql --persist df

	Year	OCT	NOV	DEC	JAN	FEB	MAR	APR	MAY	JUN	JUL	AUG	SEP
0	2002	545.92	3115.08	3996.76	1815.74	1204.14	1644.02	795.92	540.24	112.62	79.52	22.20	171.70
1	2003	55.41	1242.23	2976.94	797.72	836.01	1026.11	1571.27	468.59	24.93	98.33	267.40	99.20
2	2004	55.90	834.40	2311.72	942.75	2019.22	399.52	339.18	251.64	72.38	55.57	116.74	97.48
3	2006	347.22	908.44	2981.16	1793.97	995.27	2031.19	1602.55	287.21	102.44	90.31	18.75	33.76
4	2005	1449.23	619.77	1789.93	1777.23	1055.41	1472.91	743.11	1113.26	309.19	46.61	86.51	93.98
5	2007	178.81	942.89	1279.33	320.66	1615.47	317.77	519.85	150.15	85.32	102.60	62.74	164.02
6	2008	612.92	329.64	1189.43	2153.17	1007.57	316.55	153.53	255.80	42.45	41.73	36.83	12.59
7	2009	272.27	777.81	1102.87	533.64	1479.80	881.30	297.80	580.28	320.84	35.95	53.25	42.99
8	2010	722.61	379.36	1029.03	1780.54	1136.93	814.83	1225.52	487.75	146.90	38.05	46.77	88.72
9	2011	1059.68	1016.81	2555.51	463.27	1059.94	2173.84	465.80	552.36	350.04	84.51	21.59	76.77
10	2020	56.21	311.24	1202.38	578.53	70.34	789.14	509.50	375.76	70.86	17.71	25.99	21.65
11	2012	527.67	523.53	156.69	1066.21	421.75	1643.46	828.51	115.19	164.73	98.85	92.70	45.65
12	2014	119.18	281.26	178.54	233.12	1314.08	978.52	376.71	179.59	42.21	96.86	163.17	244.05
13	2013	376.65	1131.15	1759.84	338.40	189.12	391.51	259.37	254.56	142.22	72.63	96.91	343.69
14	2015	342.81	551.60	1654.38	149.87	860.72	243.26	343.53	338.50	109.07	201.23	49.52	84.94
15	2016	305.11	635.79	1530.09	1754.74	367.08	1395.37	427.64	245.77	110.11	37.56	28.19	52.88
16	2017	1175.34	581.67	1155.19	2628.17	2110.15	751.90	825.07	153.14	118.32	53.72	72.88	156.63
17	2018	130.64	967.23	118.69	847.21	201.54	1540.98	578.42	201.33	43.35	68.03	22.84	14.06
18	2019	199.39	754.07	585.14	1282.06	2261.94	1090.67	442.48	743.01	48.86	25.14	48.34	208.95

	Year	variable	value
0	2002	OCT	545.92
1	2003	OCT	55.41
2	2004	OCT	55.90
3	2006	OCT	347.22
4	2005	OCT	1449.23
...	...	...	...
223	2015	SEP	84.94
224	2016	SEP	52.88
225	2017	SEP	156.63
226	2018	SEP	14.06
227	2019	SEP	208.95

	Year	variable	value
0	2002	OCT	545.92
19	2002	NOV	3115.08
38	2002	DEC	3996.76
57	2002	JAN	1815.74
76	2002	FEB	1204.14
95	2002	MAR	1644.02
114	2002	APR	795.92
133	2002	MAY	540.24
152	2002	JUN	112.62
171	2002	JUL	79.52
190	2002	AUG	22.20
209	2002	SEP	171.70

	value
variable	APR	AUG	DEC	FEB	JAN	JUL	JUN	MAR	MAY	NOV	OCT	SEP
Year
2002	795.92	22.20	3996.76	1204.14	1815.74	79.52	112.62	1644.02	540.24	3115.08	545.92	171.70
2003	1571.27	267.40	2976.94	836.01	797.72	98.33	24.93	1026.11	468.59	1242.23	55.41	99.20
2004	339.18	116.74	2311.72	2019.22	942.75	55.57	72.38	399.52	251.64	834.40	55.90	97.48
2005	743.11	86.51	1789.93	1055.41	1777.23	46.61	309.19	1472.91	1113.26	619.77	1449.23	93.98
2006	1602.55	18.75	2981.16	995.27	1793.97	90.31	102.44	2031.19	287.21	908.44	347.22	33.76
2007	519.85	62.74	1279.33	1615.47	320.66	102.60	85.32	317.77	150.15	942.89	178.81	164.02
2008	153.53	36.83	1189.43	1007.57	2153.17	41.73	42.45	316.55	255.80	329.64	612.92	12.59
2009	297.80	53.25	1102.87	1479.80	533.64	35.95	320.84	881.30	580.28	777.81	272.27	42.99
2010	1225.52	46.77	1029.03	1136.93	1780.54	38.05	146.90	814.83	487.75	379.36	722.61	88.72
2011	465.80	21.59	2555.51	1059.94	463.27	84.51	350.04	2173.84	552.36	1016.81	1059.68	76.77
2012	828.51	92.70	156.69	421.75	1066.21	98.85	164.73	1643.46	115.19	523.53	527.67	45.65
2013	259.37	96.91	1759.84	189.12	338.40	72.63	142.22	391.51	254.56	1131.15	376.65	343.69
2014	376.71	163.17	178.54	1314.08	233.12	96.86	42.21	978.52	179.59	281.26	119.18	244.05
2015	343.53	49.52	1654.38	860.72	149.87	201.23	109.07	243.26	338.50	551.60	342.81	84.94
2016	427.64	28.19	1530.09	367.08	1754.74	37.56	110.11	1395.37	245.77	635.79	305.11	52.88
2017	825.07	72.88	1155.19	2110.15	2628.17	53.72	118.32	751.90	153.14	581.67	1175.34	156.63
2018	578.42	22.84	118.69	201.54	847.21	68.03	43.35	1540.98	201.33	967.23	130.64	14.06
2019	442.48	48.34	585.14	2261.94	1282.06	25.14	48.86	1090.67	743.01	754.07	199.39	208.95
2020	509.50	25.99	1202.38	70.34	578.53	17.71	70.86	789.14	375.76	311.24	56.21	21.65

	Year	ID	Location	Station	OCT	NOV	DEC	JAN	FEB	MAR	APR	MAY	JUN	JUL	AUG	SEP
0	2002	4BK	BROOKINGS	SOUTHERN OREGON COASTAL	12.86	29.06	34.64	34.64	18.20	12.10	13.24	7.30	7.36	0.04	0.06	2.90
1	2002	ASHO3	ASHLAND	SOUTHERN OREGON COASTAL	0.76	7.00	6.82	2.64	2.58	2.58	5.84	3.76	0.16	0.56	0.00	0.40
2	2002	COPO3	COPPER 4NE	SOUTHERN OREGON COASTAL	0.58	13.36	13.96	6.84	3.98	3.60	0.00	0.00	0.00	0.00	0.00	0.54
3	2002	CVJO3	CAVE JUNCTION	SOUTHERN OREGON COASTAL	4.92	27.20	29.62	19.52	12.92	9.26	3.88	1.78	0.00	0.00	0.00	0.66
4	2002	GOLO3	GOLD BEACH	SOUTHERN OREGON COASTAL	9.26	23.44	33.18	29.16	17.78	13.24	9.46	3.00	4.18	0.04	0.00	1.24
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
6203	2020	KENC1	KENTFIELD	RUSSIAN...NAPA...SAN FRANCISCO BAY	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
6204	2020	MHMC1	MOUNT HAMILTON	RUSSIAN...NAPA...SAN FRANCISCO BAY	0.08	1.65	6.36	3.12	0.00	3.80	2.94	1.55	0.00	0.00	0.02	0.00
6205	2020	NSHC1	NAPA STATE HOSPITAL	RUSSIAN...NAPA...SAN FRANCISCO BAY	0.00	0.96	5.21	2.09	0.00	1.59	1.11	2.92	0.00	0.00	0.06	0.00
6206	2020	OAMC1	OAKLAND MUSEUM	RUSSIAN...NAPA...SAN FRANCISCO BAY	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
6207	2020	PORC1	POTTER VALLEY PH	RUSSIAN...NAPA...SAN FRANCISCO BAY	0.26	1.71	6.53	5.77	0.01	2.30	0.96	2.38	0.00	0.00	0.14	0.00

Lecture 11¶

Structural Transformation: From Relations to Matrices and Back (and everything in between)¶

What does an unpivot look like (Matrix -> Relational)?¶

PIVOTs and PIVOT(UNPIVOT) = ?¶

Extra Columns¶

[Extra] Multisets to Sets¶

Set up connections and schema¶

Representing multiset relations as counted-set relations¶

Use a CTAS statement with group by to convert standard tables to counted-set tables¶

How do we make selection on counted-set tables work like multisets?¶

What about projection?¶

What about cross-product?¶

[Scratch] Transposing Demo Code¶

	Location	Station	Year	ID	value
variable					APR	AUG	DEC	FEB	JAN	JUL	JUN	MAR	MAY	NOV	OCT	SEP
0	ADIN	SACRAMENTO...YUBA...FEATHER...AMERICAN	2002	ADNC1	3.68	0.00	4.74	2.44	3.24	0.00	0.20	2.80	0.92	4.92	0.42	0.00
1	ADIN	SACRAMENTO...YUBA...FEATHER...AMERICAN	2003	ADNC1	3.19	0.69	2.37	0.78	1.04	0.00	0.02	2.28	1.69	1.69	0.05	0.15
2	ADIN	SACRAMENTO...YUBA...FEATHER...AMERICAN	2004	ADNC1	0.51	0.11	4.38	2.76	1.12	0.00	0.66	0.50	2.36	1.19	0.00	0.09
3	ADIN	SACRAMENTO...YUBA...FEATHER...AMERICAN	2005	ADNC1	1.53	0.00	1.19	0.93	0.78	0.00	0.66	1.45	4.91	2.18	2.92	0.00
4	ADIN	SACRAMENTO...YUBA...FEATHER...AMERICAN	2006	ADNC1	2.75	0.04	6.02	1.35	2.41	0.12	0.58	2.57	0.75	3.23	1.16	0.01
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
6203	YUMA	LOWER COLORADO	2009	YUM	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
6204	YUMA	LOWER COLORADO	2010	YUM	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
6205	YUMA	LOWER COLORADO	2011	YUM	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
6206	YUMA	LOWER COLORADO	2012	YUM	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
6207	YUMA	LOWER COLORADO	2013	YUM	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00