Skip to content

Commit 8ede68c

Browse files
authored
Merge pull request #136 from AllenNeuralDynamics/release-v1.2.0
Release v1.2.0
2 parents 1c90371 + dceb0a2 commit 8ede68c

File tree

3 files changed

+134
-12
lines changed

3 files changed

+134
-12
lines changed

src/aind_data_access_api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Init package"""
22

3-
__version__ = "1.1.0"
3+
__version__ = "1.2.0"

src/aind_data_access_api/utils.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def paginate_docdb(
118118
119119
Parameters
120120
----------
121-
docdb_api_client : MongoClient
121+
docdb_api_client : MetadataDbClient
122122
page_size : int
123123
Default is 500
124124
filter_query : Optional[dict]
@@ -147,6 +147,37 @@ def paginate_docdb(
147147
skip += len(page)
148148

149149

150+
def fetch_records_by_filter_list(
151+
docdb_api_client: MetadataDbClient,
152+
filter_key: str,
153+
filter_values: List[str],
154+
projection: Optional[dict] = None,
155+
) -> List[dict]:
156+
"""
157+
Queries DocDB for records where the value of a specified field is in a
158+
list of values. Uses an aggregation pipeline with $in filter operator.
159+
160+
Parameters
161+
----------
162+
docdb_api_client : MetadataDbClient
163+
filter_key : str
164+
The field to filter on.
165+
filter_values : List[str]
166+
The list of values to filter on.
167+
projection : Optional[dict]
168+
Subset of fields to return. Default is None which returns all fields.
169+
170+
Returns
171+
-------
172+
List[dict]
173+
"""
174+
agg_pipeline = [{"$match": {filter_key: {"$in": filter_values}}}]
175+
if projection:
176+
agg_pipeline.append({"$project": projection})
177+
results = docdb_api_client.aggregate_docdb_records(pipeline=agg_pipeline)
178+
return results
179+
180+
150181
def build_docdb_location_to_id_map(
151182
docdb_api_client: MetadataDbClient,
152183
bucket: str,
@@ -160,7 +191,7 @@ def build_docdb_location_to_id_map(
160191
161192
Parameters
162193
----------
163-
docdb_api_client : MongoClient
194+
docdb_api_client : MetadataDbClient
164195
bucket : str
165196
prefixes : List[str]
166197
@@ -170,13 +201,12 @@ def build_docdb_location_to_id_map(
170201
171202
"""
172203
locations = [get_s3_location(bucket=bucket, prefix=p) for p in prefixes]
173-
filter_query = {"location": {"$regex": f"s3://{bucket}/"}}
174204
projection = {"_id": 1, "location": 1}
175-
results = docdb_api_client.retrieve_docdb_records(
176-
filter_query=filter_query, projection=projection
205+
results = fetch_records_by_filter_list(
206+
docdb_api_client=docdb_api_client,
207+
filter_key="location",
208+
filter_values=locations,
209+
projection=projection,
177210
)
178-
# only return locations that are in the list of prefixes
179-
location_to_id_map = {
180-
r["location"]: r["_id"] for r in results if r["location"] in locations
181-
}
211+
location_to_id_map = {r["location"]: r["_id"] for r in results}
182212
return location_to_id_map

tests/test_utils.py

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from aind_data_access_api.utils import (
1010
build_docdb_location_to_id_map,
1111
does_metadata_record_exist_in_docdb,
12+
fetch_records_by_filter_list,
1213
get_record_from_docdb,
1314
get_s3_bucket_and_prefix,
1415
get_s3_location,
@@ -138,12 +139,93 @@ def test_paginate_docdb(self, mock_docdb_api_client: MagicMock):
138139
self.assertEqual(expected_results, actual_results)
139140

140141
@patch("aind_data_access_api.document_db.MetadataDbClient")
141-
def test_build_docdb_location_to_id_map(
142+
def test_fetch_records_by_filter_list(
142143
self, mock_docdb_api_client: MagicMock
144+
):
145+
"""Tests fetch_records_by_filter_list"""
146+
expected_records = [
147+
{
148+
"_id": "70bcf356-985f-4a2a-8105-de900e35e788",
149+
"name": "prefix1",
150+
"location": "s3://bucket/prefix1",
151+
},
152+
{
153+
"_id": "5ca4a951-d374-4f4b-8279-d570a35b2286",
154+
"name": "prefix2",
155+
"location": "s3://bucket/prefix2",
156+
},
157+
]
158+
mock_docdb_api_client.aggregate_docdb_records.return_value = (
159+
expected_records
160+
)
161+
162+
records = fetch_records_by_filter_list(
163+
docdb_api_client=mock_docdb_api_client,
164+
filter_key="name",
165+
filter_values=["prefix1", "prefix2", "missing_prefix"],
166+
)
167+
self.assertEqual(expected_records, records)
168+
mock_docdb_api_client.aggregate_docdb_records.assert_called_once_with(
169+
pipeline=[
170+
{
171+
"$match": {
172+
"name": {
173+
"$in": ["prefix1", "prefix2", "missing_prefix"]
174+
}
175+
}
176+
},
177+
]
178+
)
179+
180+
@patch("aind_data_access_api.document_db.MetadataDbClient")
181+
def test_fetch_records_by_filter_list_projection(
182+
self, mock_docdb_api_client: MagicMock
183+
):
184+
"""Tests fetch_records_by_filter_list with projection"""
185+
expected_records = [
186+
{
187+
"_id": "70bcf356-985f-4a2a-8105-de900e35e788",
188+
"name": "prefix1",
189+
},
190+
{
191+
"_id": "5ca4a951-d374-4f4b-8279-d570a35b2286",
192+
"name": "prefix2",
193+
},
194+
]
195+
mock_docdb_api_client.aggregate_docdb_records.return_value = (
196+
expected_records
197+
)
198+
199+
records = fetch_records_by_filter_list(
200+
docdb_api_client=mock_docdb_api_client,
201+
filter_key="name",
202+
filter_values=["prefix1", "prefix2", "missing_prefix"],
203+
projection={"_id": 1, "name": 1},
204+
)
205+
self.assertEqual(expected_records, records)
206+
mock_docdb_api_client.aggregate_docdb_records.assert_called_once_with(
207+
pipeline=[
208+
{
209+
"$match": {
210+
"name": {
211+
"$in": ["prefix1", "prefix2", "missing_prefix"]
212+
}
213+
}
214+
},
215+
{"$project": {"_id": 1, "name": 1}},
216+
]
217+
)
218+
219+
@patch("aind_data_access_api.utils.fetch_records_by_filter_list")
220+
@patch("aind_data_access_api.document_db.MetadataDbClient")
221+
def test_build_docdb_location_to_id_map(
222+
self,
223+
mock_docdb_api_client: MagicMock,
224+
mock_fetch_records_by_filter_list: MagicMock,
143225
):
144226
"""Tests build_docdb_location_to_id_map"""
145227
bucket = "aind-ephys-data-dev-u5u0i5"
146-
mock_docdb_api_client.retrieve_docdb_records.return_value = [
228+
mock_fetch_records_by_filter_list.return_value = [
147229
{
148230
"_id": "70bcf356-985f-4a2a-8105-de900e35e788",
149231
"location": (
@@ -176,6 +258,16 @@ def test_build_docdb_location_to_id_map(
176258
),
177259
}
178260
self.assertEqual(expected_map, actual_map)
261+
mock_fetch_records_by_filter_list.assert_called_once_with(
262+
docdb_api_client=mock_docdb_api_client,
263+
filter_key="location",
264+
filter_values=[
265+
f"s3://{bucket}/ecephys_655019_2000-04-04_04-00-00",
266+
f"s3://{bucket}/ecephys_567890_2000-01-01_04-00-00",
267+
f"s3://{bucket}/missing_655019_2000-01-01_01-01-02",
268+
],
269+
projection={"_id": 1, "location": 1},
270+
)
179271

180272

181273
if __name__ == "__main__":

0 commit comments

Comments
 (0)