import json
import pymongo
import pprint
Nobel laureate data from Kaggle: https://www.kaggle.com/datasets/imdevskp/nobel-prize/data
client = pymongo.MongoClient('mongodb://localhost')
client.list_database_names()
['admin', 'aggquery', 'aggquerydb', 'config', 'local', 'nobel_prizes', 'tutorial']
# client.drop_database('nobel_prizes') # if already exists
Connect to an empty database called nobel_prizes
db = client.nobel_prizes
# db = client['nobel_prizes'] # also works
db
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'nobel_prizes')
db.list_collection_names()
['prizes']
Within this empty database, access an empty collection called prizes
.
collection = db.prizes
# collection = db['prizes'] # also works
collection
Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'nobel_prizes'), 'prizes')
find_one()
returns None (i.e., displays nothing in notebooks)
db.prizes.find_one({})
{'_id': ObjectId('65371d7425624134ef010320'), 'year': 2020, 'category': 'chemistry', 'laureates': [{'id': '991', 'firstname': 'Emmanuelle', 'surname': 'Charpentier', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}, {'id': '992', 'firstname': 'Jennifer A.', 'surname': 'Doudna', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}]}
print(db.prizes.find_one({}))
{'_id': ObjectId('65371d7425624134ef010320'), 'year': 2020, 'category': 'chemistry', 'laureates': [{'id': '991', 'firstname': 'Emmanuelle', 'surname': 'Charpentier', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}, {'id': '992', 'firstname': 'Jennifer A.', 'surname': 'Doudna', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}]}
# run this cell to insert into the collection prizes
with open('data/prize.json', encoding='utf-8') as f:
db.prizes.insert_many(json.loads(f.read()))
# the database and the collection are now both valid
client.list_database_names()
['admin', 'aggquery', 'aggquerydb', 'config', 'local', 'nobel_prizes', 'tutorial']
db.list_collection_names()
['prizes']
Select all field-value pairs to output.
db.prizes.find_one({})
{'_id': ObjectId('65371d7425624134ef010320'), 'year': 2020, 'category': 'chemistry', 'laureates': [{'id': '991', 'firstname': 'Emmanuelle', 'surname': 'Charpentier', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}, {'id': '992', 'firstname': 'Jennifer A.', 'surname': 'Doudna', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}]}
Pymongo is smart and avoids querying the full collection by instead returning an iterator (here, a Cursor):
db.prizes.find({})
<pymongo.cursor.Cursor at 0x7f1170820a50>
It's not recommended, but you could force query the full collection by casting to list (e.g., list(db.prizes.find({}))
).
Instead let's iterate over the collection a tiny bit so that we can see how to pretty print each document with pprint
:
def pretty_print(output_collection, n_to_print=3):
"""
note if n_to_print is -1, this print everything
"""
for i, doc in enumerate(output_collection):
pprint.pprint(doc) # nicely formats each document
if i+1 == n_to_print: return
output = db.prizes.find({})
pretty_print(output)
{'_id': ObjectId('65371d7425624134ef010320'), 'category': 'chemistry', 'laureates': [{'firstname': 'Emmanuelle', 'id': '991', 'motivation': '"for the development of a method for genome ' 'editing"', 'share': '2', 'surname': 'Charpentier'}, {'firstname': 'Jennifer A.', 'id': '992', 'motivation': '"for the development of a method for genome ' 'editing"', 'share': '2', 'surname': 'Doudna'}], 'year': 2020} {'_id': ObjectId('65371d7425624134ef010321'), 'category': 'economics', 'laureates': [{'firstname': 'Paul', 'id': '995', 'motivation': '"for improvements to auction theory and ' 'inventions of new auction formats"', 'share': '2', 'surname': 'Milgrom'}, {'firstname': 'Robert', 'id': '996', 'motivation': '"for improvements to auction theory and ' 'inventions of new auction formats"', 'share': '2', 'surname': 'Wilson'}], 'year': 2020} {'_id': ObjectId('65371d7425624134ef010322'), 'category': 'literature', 'laureates': [{'firstname': 'Louise', 'id': '993', 'motivation': '"for her unmistakable poetic voice that with ' 'austere beauty makes individual existence ' 'universal"', 'share': '1', 'surname': 'Glück'}], 'year': 2020}
# get a document that has the exact FV pair
db.prizes.find_one({"category": "chemistry"})
{'_id': ObjectId('65371d7425624134ef010320'), 'year': 2020, 'category': 'chemistry', 'laureates': [{'id': '991', 'firstname': 'Emmanuelle', 'surname': 'Charpentier', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}, {'id': '992', 'firstname': 'Jennifer A.', 'surname': 'Doudna', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}]}
# the comma functions as the "and" operator
db.prizes.find_one({"category": "chemistry", "year": 2020})
{'_id': ObjectId('65371d7425624134ef010320'), 'year': 2020, 'category': 'chemistry', 'laureates': [{'id': '991', 'firstname': 'Emmanuelle', 'surname': 'Charpentier', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}, {'id': '992', 'firstname': 'Jennifer A.', 'surname': 'Doudna', 'motivation': '"for the development of a method for genome editing"', 'share': '2'}]}
$
Notation: Special MongoDB Keywords¶(we defined the pretty_print()
function earlier)
output = db.prizes.find({"$or": [{"category": "chemistry"}, {"year": 2020}]})
pretty_print(output, n_to_print=4)
{'_id': ObjectId('65371d7425624134ef010320'), 'category': 'chemistry', 'laureates': [{'firstname': 'Emmanuelle', 'id': '991', 'motivation': '"for the development of a method for genome ' 'editing"', 'share': '2', 'surname': 'Charpentier'}, {'firstname': 'Jennifer A.', 'id': '992', 'motivation': '"for the development of a method for genome ' 'editing"', 'share': '2', 'surname': 'Doudna'}], 'year': 2020} {'_id': ObjectId('65371d7425624134ef010321'), 'category': 'economics', 'laureates': [{'firstname': 'Paul', 'id': '995', 'motivation': '"for improvements to auction theory and ' 'inventions of new auction formats"', 'share': '2', 'surname': 'Milgrom'}, {'firstname': 'Robert', 'id': '996', 'motivation': '"for improvements to auction theory and ' 'inventions of new auction formats"', 'share': '2', 'surname': 'Wilson'}], 'year': 2020} {'_id': ObjectId('65371d7425624134ef010322'), 'category': 'literature', 'laureates': [{'firstname': 'Louise', 'id': '993', 'motivation': '"for her unmistakable poetic voice that with ' 'austere beauty makes individual existence ' 'universal"', 'share': '1', 'surname': 'Glück'}], 'year': 2020} {'_id': ObjectId('65371d7425624134ef010323'), 'category': 'peace', 'laureates': [{'firstname': 'World Food Programme', 'id': '994', 'motivation': '"for its efforts to combat hunger, for its ' 'contribution to bettering conditions for peace ' 'in conflict-affected areas and for acting as a ' 'driving force in efforts to prevent the use of ' 'hunger as a weapon of war and conflict"', 'share': '1'}], 'year': 2020}
output = db.prizes.find({"year": {"$gt": 2018}}, skip=6) # skip a bit down the collection just because
pretty_print(output)
{'_id': ObjectId('65371d7425624134ef010326'), 'category': 'chemistry', 'laureates': [{'firstname': 'John', 'id': '976', 'motivation': '"for the development of lithium-ion batteries"', 'share': '3', 'surname': 'Goodenough'}, {'firstname': 'M. Stanley', 'id': '977', 'motivation': '"for the development of lithium-ion batteries"', 'share': '3', 'surname': 'Whittingham'}, {'firstname': 'Akira', 'id': '978', 'motivation': '"for the development of lithium-ion batteries"', 'share': '3', 'surname': 'Yoshino'}], 'year': 2019} {'_id': ObjectId('65371d7425624134ef010327'), 'category': 'economics', 'laureates': [{'firstname': 'Abhijit', 'id': '982', 'motivation': '"for their experimental approach to alleviating ' 'global poverty"', 'share': '3', 'surname': 'Banerjee'}, {'firstname': 'Esther', 'id': '983', 'motivation': '"for their experimental approach to alleviating ' 'global poverty"', 'share': '3', 'surname': 'Duflo'}, {'firstname': 'Michael', 'id': '984', 'motivation': '"for their experimental approach to alleviating ' 'global poverty"', 'share': '3', 'surname': 'Kremer'}], 'year': 2019} {'_id': ObjectId('65371d7425624134ef010328'), 'category': 'literature', 'laureates': [{'firstname': 'Peter', 'id': '980', 'motivation': '"for an influential work that with linguistic ' 'ingenuity has explored the periphery and the ' 'specificity of human experience"', 'share': '1', 'surname': 'Handke'}], 'year': 2019}
.
Notation: Traverse Trees¶output = db.prizes.find({"laureates.0.surname": "Curie"})
pretty_print(output)
{'_id': ObjectId('65371d7425624134ef010575'), 'category': 'chemistry', 'laureates': [{'firstname': 'Marie', 'id': '6', 'motivation': '"in recognition of her services to the ' 'advancement of chemistry by the discovery of ' 'the elements radium and polonium, by the ' 'isolation of radium and the study of the nature ' 'and compounds of this remarkable element"', 'share': '1', 'surname': 'Curie'}], 'year': 1911} {'_id': ObjectId('6538178131ad5e3e548b9456'), 'category': 'chemistry', 'laureates': [{'firstname': 'Marie', 'id': '6', 'motivation': '"in recognition of her services to the ' 'advancement of chemistry by the discovery of ' 'the elements radium and polonium, by the ' 'isolation of radium and the study of the nature ' 'and compounds of this remarkable element"', 'share': '1', 'surname': 'Curie'}], 'year': 1911}
output = db.prizes.find({"laureates.1.surname": "Curie"})
pretty_print(output)
{'_id': ObjectId('65371d7425624134ef0105a0'), 'category': 'physics', 'laureates': [{'firstname': 'Henri', 'id': '4', 'motivation': '"in recognition of the extraordinary services ' 'he has rendered by his discovery of spontaneous ' 'radioactivity"', 'share': '2', 'surname': 'Becquerel'}, {'firstname': 'Pierre', 'id': '5', 'motivation': '"in recognition of the extraordinary services ' 'they have rendered by their joint researches on ' 'the radiation phenomena discovered by Professor ' 'Henri Becquerel"', 'share': '4', 'surname': 'Curie'}, {'firstname': 'Marie', 'id': '6', 'motivation': '"in recognition of the extraordinary services ' 'they have rendered by their joint researches on ' 'the radiation phenomena discovered by Professor ' 'Henri Becquerel"', 'share': '4', 'surname': 'Curie'}], 'year': 1903} {'_id': ObjectId('6538178131ad5e3e548b9481'), 'category': 'physics', 'laureates': [{'firstname': 'Henri', 'id': '4', 'motivation': '"in recognition of the extraordinary services ' 'he has rendered by his discovery of spontaneous ' 'radioactivity"', 'share': '2', 'surname': 'Becquerel'}, {'firstname': 'Pierre', 'id': '5', 'motivation': '"in recognition of the extraordinary services ' 'they have rendered by their joint researches on ' 'the radiation phenomena discovered by Professor ' 'Henri Becquerel"', 'share': '4', 'surname': 'Curie'}, {'firstname': 'Marie', 'id': '6', 'motivation': '"in recognition of the extraordinary services ' 'they have rendered by their joint researches on ' 'the radiation phenomena discovered by Professor ' 'Henri Becquerel"', 'share': '4', 'surname': 'Curie'}], 'year': 1903}
output = db.prizes.find({"laureates.surname": "Curie"})
pretty_print(output)
{'_id': ObjectId('65371d7425624134ef010575'), 'category': 'chemistry', 'laureates': [{'firstname': 'Marie', 'id': '6', 'motivation': '"in recognition of her services to the ' 'advancement of chemistry by the discovery of ' 'the elements radium and polonium, by the ' 'isolation of radium and the study of the nature ' 'and compounds of this remarkable element"', 'share': '1', 'surname': 'Curie'}], 'year': 1911} {'_id': ObjectId('65371d7425624134ef0105a0'), 'category': 'physics', 'laureates': [{'firstname': 'Henri', 'id': '4', 'motivation': '"in recognition of the extraordinary services ' 'he has rendered by his discovery of spontaneous ' 'radioactivity"', 'share': '2', 'surname': 'Becquerel'}, {'firstname': 'Pierre', 'id': '5', 'motivation': '"in recognition of the extraordinary services ' 'they have rendered by their joint researches on ' 'the radiation phenomena discovered by Professor ' 'Henri Becquerel"', 'share': '4', 'surname': 'Curie'}, {'firstname': 'Marie', 'id': '6', 'motivation': '"in recognition of the extraordinary services ' 'they have rendered by their joint researches on ' 'the radiation phenomena discovered by Professor ' 'Henri Becquerel"', 'share': '4', 'surname': 'Curie'}], 'year': 1903} {'_id': ObjectId('6538178131ad5e3e548b9456'), 'category': 'chemistry', 'laureates': [{'firstname': 'Marie', 'id': '6', 'motivation': '"in recognition of her services to the ' 'advancement of chemistry by the discovery of ' 'the elements radium and polonium, by the ' 'isolation of radium and the study of the nature ' 'and compounds of this remarkable element"', 'share': '1', 'surname': 'Curie'}], 'year': 1911}
output = db.prizes.find({}, {"year": 1, "category": 1})
pretty_print(output)
{'_id': ObjectId('65371d7425624134ef010320'), 'category': 'chemistry', 'year': 2020} {'_id': ObjectId('65371d7425624134ef010321'), 'category': 'economics', 'year': 2020} {'_id': ObjectId('65371d7425624134ef010322'), 'category': 'literature', 'year': 2020}
output = db.prizes.find({}, {"year": 1, "category": 1, "_id": 0})
pretty_print(output)
{'category': 'chemistry', 'year': 2020} {'category': 'economics', 'year': 2020} {'category': 'literature', 'year': 2020}
output = db.prizes.find({}, {"year": 0, "category": 0})
pretty_print(output)
{'_id': ObjectId('65371d7425624134ef010320'), 'laureates': [{'firstname': 'Emmanuelle', 'id': '991', 'motivation': '"for the development of a method for genome ' 'editing"', 'share': '2', 'surname': 'Charpentier'}, {'firstname': 'Jennifer A.', 'id': '992', 'motivation': '"for the development of a method for genome ' 'editing"', 'share': '2', 'surname': 'Doudna'}]} {'_id': ObjectId('65371d7425624134ef010321'), 'laureates': [{'firstname': 'Paul', 'id': '995', 'motivation': '"for improvements to auction theory and ' 'inventions of new auction formats"', 'share': '2', 'surname': 'Milgrom'}, {'firstname': 'Robert', 'id': '996', 'motivation': '"for improvements to auction theory and ' 'inventions of new auction formats"', 'share': '2', 'surname': 'Wilson'}]} {'_id': ObjectId('65371d7425624134ef010322'), 'laureates': [{'firstname': 'Louise', 'id': '993', 'motivation': '"for her unmistakable poetic voice that with ' 'austere beauty makes individual existence ' 'universal"', 'share': '1', 'surname': 'Glück'}]}
output = db.prizes.find({}, {"year": 0, "category": 1})
pretty_print(output)
--------------------------------------------------------------------------- OperationFailure Traceback (most recent call last) Cell In[26], line 3 1 output = db.prizes.find({}, {"year": 0, "category": 1}) ----> 3 pretty_print(output) Cell In[14], line 5, in pretty_print(output_collection, n_to_print) 1 def pretty_print(output_collection, n_to_print=3): 2 """ 3 note if n_to_print is -1, this print everything 4 """ ----> 5 for i, doc in enumerate(output_collection): 6 pprint.pprint(doc) # nicely formats each document 7 if i+1 == n_to_print: return File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/cursor.py:1251, in Cursor.next(self) 1249 if self.__empty: 1250 raise StopIteration -> 1251 if len(self.__data) or self._refresh(): 1252 return self.__data.popleft() 1253 else: File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/cursor.py:1168, in Cursor._refresh(self) 1146 raise InvalidOperation( 1147 "Passing a 'hint' is required when using the min/max query" 1148 " option to ensure the query utilizes the correct index" 1149 ) 1150 q = self._query_class( 1151 self.__query_flags, 1152 self.__collection.database.name, (...) 1166 self.__exhaust, 1167 ) -> 1168 self.__send_message(q) 1169 elif self.__id: # Get More 1170 if self.__limit: File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/cursor.py:1055, in Cursor.__send_message(self, operation) 1052 raise InvalidOperation("exhaust cursors do not support auto encryption") 1054 try: -> 1055 response = client._run_operation( 1056 operation, self._unpack_response, address=self.__address 1057 ) 1058 except OperationFailure as exc: 1059 if exc.code in _CURSOR_CLOSED_ERRORS or self.__exhaust: 1060 # Don't send killCursors because the cursor is already closed. File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/_csot.py:106, in apply.<locals>.csot_wrapper(self, *args, **kwargs) 104 with _TimeoutContext(timeout): 105 return func(self, *args, **kwargs) --> 106 return func(self, *args, **kwargs) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/mongo_client.py:1341, in MongoClient._run_operation(self, operation, unpack_res, address) 1336 operation.reset() # Reset op in case of retry. 1337 return server.run_operation( 1338 sock_info, operation, read_preference, self._event_listeners, unpack_res 1339 ) -> 1341 return self._retryable_read( 1342 _cmd, 1343 operation.read_preference, 1344 operation.session, 1345 address=address, 1346 retryable=isinstance(operation, message._Query), 1347 ) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/_csot.py:106, in apply.<locals>.csot_wrapper(self, *args, **kwargs) 104 with _TimeoutContext(timeout): 105 return func(self, *args, **kwargs) --> 106 return func(self, *args, **kwargs) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/mongo_client.py:1465, in MongoClient._retryable_read(self, func, read_pref, session, address, retryable) 1463 assert last_error is not None 1464 raise last_error -> 1465 return func(session, server, sock_info, read_pref) 1466 except ServerSelectionTimeoutError: 1467 if retrying: 1468 # The application may think the write was never attempted 1469 # if we raise ServerSelectionTimeoutError on the retry 1470 # attempt. Raise the original exception instead. File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/mongo_client.py:1337, in MongoClient._run_operation.<locals>._cmd(session, server, sock_info, read_preference) 1335 def _cmd(session, server, sock_info, read_preference): 1336 operation.reset() # Reset op in case of retry. -> 1337 return server.run_operation( 1338 sock_info, operation, read_preference, self._event_listeners, unpack_res 1339 ) File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/helpers.py:279, in _handle_reauth.<locals>.inner(*args, **kwargs) 276 from pymongo.pool import SocketInfo 278 try: --> 279 return func(*args, **kwargs) 280 except OperationFailure as exc: 281 if no_reauth: File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/server.py:135, in Server.run_operation(self, sock_info, operation, read_preference, listeners, unpack_res) 133 first = docs[0] 134 operation.client._process_response(first, operation.session) --> 135 _check_command_response(first, sock_info.max_wire_version) 136 except Exception as exc: 137 if publish: File /srv/conda/envs/notebook/lib/python3.11/site-packages/pymongo/helpers.py:194, in _check_command_response(response, max_wire_version, allowable_errors, parse_write_concern_error) 191 elif code == 43: 192 raise CursorNotFound(errmsg, code, response, max_wire_version) --> 194 raise OperationFailure(errmsg, code, response, max_wire_version) OperationFailure: Cannot do inclusion on field category in exclusion projection, full error: {'ok': 0.0, 'errmsg': 'Cannot do inclusion on field category in exclusion projection', 'code': 31253, 'codeName': 'Location31253'}
db.prizes.find_one({"overallMotivation": {"$exists": 1}})
{'_id': ObjectId('65371d7425624134ef01032a'), 'year': 2019, 'category': 'physics', 'overallMotivation': '"for contributions to our understanding of the evolution of the universe and Earth’s place in the cosmos"', 'laureates': [{'id': '973', 'firstname': 'James', 'surname': 'Peebles', 'motivation': '"for theoretical discoveries in physical cosmology"', 'share': '2'}, {'id': '974', 'firstname': 'Michel', 'surname': 'Mayor', 'motivation': '"for the discovery of an exoplanet orbiting a solar-type star"', 'share': '4'}, {'id': '975', 'firstname': 'Didier', 'surname': 'Queloz', 'motivation': '"for the discovery of an exoplanet orbiting a solar-type star"', 'share': '4'}]}
output = (db.prizes.find({"category": "peace"},
{"_id": 0, "category": 1, "year": 1,
"laureates.firstname": 1, "laureates.surname": 1})
.sort("year")
)
pretty_print(output)
{'category': 'peace', 'laureates': [{'firstname': 'Henry', 'surname': 'Dunant'}, {'firstname': 'Frédéric', 'surname': 'Passy'}], 'year': 1901} {'category': 'peace', 'laureates': [{'firstname': 'Henry', 'surname': 'Dunant'}, {'firstname': 'Frédéric', 'surname': 'Passy'}], 'year': 1901} {'category': 'peace', 'laureates': [{'firstname': 'Élie', 'surname': 'Ducommun'}, {'firstname': 'Albert', 'surname': 'Gobat'}], 'year': 1902}
output = (db.prizes.find({"category": "peace"},
{"_id": 0, "category": 1, "year": 1,
"laureates.firstname": 1, "laureates.surname": 1})
.sort("year", -1)
)
pretty_print(output)
{'category': 'peace', 'laureates': [{'firstname': 'World Food Programme'}], 'year': 2020} {'category': 'peace', 'laureates': [{'firstname': 'World Food Programme'}], 'year': 2020} {'category': 'peace', 'laureates': [{'firstname': 'Abiy', 'surname': 'Ahmed Ali'}], 'year': 2019}
output = (db.prizes.find({"category": "peace"},
{"_id": 0, "category": 1, "year": 1,
"laureates.firstname": 1, "laureates.surname": 1})
.sort([("year", 1), ("category", -1)])
)
pretty_print(output)
{'category': 'peace', 'laureates': [{'firstname': 'Henry', 'surname': 'Dunant'}, {'firstname': 'Frédéric', 'surname': 'Passy'}], 'year': 1901} {'category': 'peace', 'laureates': [{'firstname': 'Henry', 'surname': 'Dunant'}, {'firstname': 'Frédéric', 'surname': 'Passy'}], 'year': 1901} {'category': 'peace', 'laureates': [{'firstname': 'Élie', 'surname': 'Ducommun'}, {'firstname': 'Albert', 'surname': 'Gobat'}], 'year': 1902}
output = (db.prizes.find({"category": "peace"},
{"_id": 0, "category": 1, "year": 1,
"laureates.firstname": 1, "laureates.surname": 1})
.sort([("year", 1), ("category", -1)])
.limit(2)
)
pretty_print(output)
{'category': 'peace', 'laureates': [{'firstname': 'Henry', 'surname': 'Dunant'}, {'firstname': 'Frédéric', 'surname': 'Passy'}], 'year': 1901} {'category': 'peace', 'laureates': [{'firstname': 'Henry', 'surname': 'Dunant'}, {'firstname': 'Frédéric', 'surname': 'Passy'}], 'year': 1901}
Zips JSON from the MongoDB Aggregation Tutorial: https://www.mongodb.com/docs/manual/tutorial/aggregation-zip-code-data-set/
zips.json
into new local.zips
¶For the sake of simplicity, we'll make a new collection zips
in a new aggquerydb
database.
# reimport/redefine as needed
import json
import pymongo
import pprint
def pretty_print(output_collection, n_to_print=3):
"""
note if n_to_print is -1, this print everything
"""
for i, doc in enumerate(output_collection):
pprint.pprint(doc) # nicely formats each document
if i+1 == n_to_print: return
client = pymongo.MongoClient('mongodb://localhost')
client.list_database_names()
['admin', 'aggquery', 'aggquerydb', 'config', 'local', 'nobel_prizes', 'tutorial']
# run this cell to make the new collection
# and insert zipcode documents
client.drop_database('aggquerydb') # if already exists
db = client.aggquerydb
with open('data/zips.json', encoding='utf-8') as f:
for line in f.readlines():
db.zips.insert_one(json.loads(line))
db.zips.count_documents({})
29353
output = db.zips.aggregate( [
{ "$group": { "_id": "$state",
"totalPop": { "$sum": "$pop" } } },
{ "$match": { "totalPop":
{ "$gte": 15000000 } } },
{ "$sort" : { "totalPop" : -1 } }
] )
pretty_print(output)
{'_id': 'CA', 'totalPop': 29754890} {'_id': 'NY', 'totalPop': 17990402} {'_id': 'TX', 'totalPop': 16984601}
Make a new collection, inventory
.
# stay in the aggquerydb database
db = client.aggquerydb
db.inventory.insert_many( [
{ "item": "journal",
"tags": ["blank", "red"],
"dim": [ 14, 21 ],
"instock": [ { "loc": "A", "qty": 5 }, { "loc": "C", "qty": 15 } ]
},
{ "item": "notebook",
"tags": ["red", "blank"],
"dim": [ 14, 21 ],
"instock": [ { "loc": "C", "qty": 5 } ]
},
{ "item": "paper",
"tags": ["red", "blank", "plain"],
"dim": [ 14, 21 ],
"instock": [ { "loc": "A", "qty": 60 }, { "loc": "B", "qty": 15 } ]
},
{ "item": "planner",
"tags": ["blank", "red"],
"dim": [ 22.85, 30 ] ,
"instock": [ { "loc": "A", "qty": 40 }, { "loc": "B", "qty": 5 } ]
},
{ "item": "postcard",
"tags": ["blue"],
"dim": [ 10, 15.25 ],
"instock": [ { "loc": "B", "qty": 15 }, { "loc": "C", "qty": 35 } ]
}
]);
output = db.inventory.aggregate( [
{ "$unwind" : "$tags" },
{ "$project" : {"_id" : 0, "instock": 0}}
] )
pretty_print(output, n_to_print=-1)
{'dim': [14, 21], 'item': 'journal', 'tags': 'blank'} {'dim': [14, 21], 'item': 'journal', 'tags': 'red'} {'dim': [14, 21], 'item': 'notebook', 'tags': 'red'} {'dim': [14, 21], 'item': 'notebook', 'tags': 'blank'} {'dim': [14, 21], 'item': 'paper', 'tags': 'red'} {'dim': [14, 21], 'item': 'paper', 'tags': 'blank'} {'dim': [14, 21], 'item': 'paper', 'tags': 'plain'} {'dim': [22.85, 30], 'item': 'planner', 'tags': 'blank'} {'dim': [22.85, 30], 'item': 'planner', 'tags': 'red'} {'dim': [10, 15.25], 'item': 'postcard', 'tags': 'blue'}
output = db.inventory.aggregate( [
{ "$unwind" : "$instock" },
{ "$group" : { "_id" : "$item", "totalqty" : {"$sum" : "$instock.qty"}}}
] )
pretty_print(output, n_to_print=-1)
{'_id': 'notebook', 'totalqty': 5} {'_id': 'journal', 'totalqty': 20} {'_id': 'planner', 'totalqty': 45} {'_id': 'postcard', 'totalqty': 50} {'_id': 'paper', 'totalqty': 75}
output = db.inventory.aggregate( [
{ "$lookup" : {
"from" : "inventory",
"localField": "instock.loc",
"foreignField": "instock.loc",
"as":"otheritems"}
},
{ "$project" :
{"_id" : 0, "tags" : 0, "dim" : 0}
} ] )
pretty_print(output, n_to_print=1)
{'instock': [{'loc': 'A', 'qty': 5}, {'loc': 'C', 'qty': 15}], 'item': 'journal', 'otheritems': [{'_id': ObjectId('6538179131ad5e3e548b948e'), 'dim': [14, 21], 'instock': [{'loc': 'A', 'qty': 5}, {'loc': 'C', 'qty': 15}], 'item': 'journal', 'tags': ['blank', 'red']}, {'_id': ObjectId('6538179131ad5e3e548b948f'), 'dim': [14, 21], 'instock': [{'loc': 'C', 'qty': 5}], 'item': 'notebook', 'tags': ['red', 'blank']}, {'_id': ObjectId('6538179131ad5e3e548b9490'), 'dim': [14, 21], 'instock': [{'loc': 'A', 'qty': 60}, {'loc': 'B', 'qty': 15}], 'item': 'paper', 'tags': ['red', 'blank', 'plain']}, {'_id': ObjectId('6538179131ad5e3e548b9491'), 'dim': [22.85, 30], 'instock': [{'loc': 'A', 'qty': 40}, {'loc': 'B', 'qty': 5}], 'item': 'planner', 'tags': ['blank', 'red']}, {'_id': ObjectId('6538179131ad5e3e548b9492'), 'dim': [10, 15.25], 'instock': [{'loc': 'B', 'qty': 15}, {'loc': 'C', 'qty': 35}], 'item': 'postcard', 'tags': ['blue']}]}