Skip to content

Commit 3122875

Browse files
authored
Merge pull request #129 from AllenNeuralDynamics/release-v1.0.0
Release v1.0.0
2 parents 397218f + 772ffe1 commit 3122875

22 files changed

+699
-1529
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: Auto add issues to project board
2+
on:
3+
issues:
4+
types:
5+
- opened
6+
7+
jobs:
8+
add-to-project:
9+
name: Add issue to project
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/[email protected]
13+
with:
14+
project-url: https://github.com/orgs/AllenNeuralDynamics/projects/9
15+
github-token: ${{ secrets.SERVICE_TOKEN }}

.readthedocs.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ build:
55
tools:
66
python: "3.8"
77

8+
sphinx:
9+
configuration: docs/source/conf.py
10+
811
python:
912
install:
1013
- method: pip

README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,31 @@ API to interact with a few AIND databases. We have two primary databases:
1010
unstructured json documents. The DocDB contains AIND metadata.
1111
2. A relational database to store structured tables.
1212

13+
## Installation
14+
15+
Basic installation:
16+
```bash
17+
pip install aind-data-access-api
18+
```
19+
20+
The package includes optional features that require additional dependencies:
21+
22+
### Document Database (DocDB)
23+
To use the `MetadataDbClient` and other DocDB features:
24+
```bash
25+
pip install "aind-data-access-api[docdb]"
26+
```
27+
Note: The quotes are required when using zsh or other shells that interpret square brackets.
28+
29+
### Relational Database
30+
For RDS functionality:
31+
```bash
32+
pip install "aind-data-access-api[rds]"
33+
```
34+
35+
### Other Install Options
36+
- AWS Secrets management: `pip install "aind-data-access-api[secrets]"`
37+
- Helpers: `pip install "aind-data-access-api[helpers]"`
38+
- All features: `pip install "aind-data-access-api[full]"`
39+
1340
More information can be found at [readthedocs](https://aind-data-access-api.readthedocs.io).

docs/source/ExamplesDocDBDirectConnection.rst

Lines changed: 13 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Count Example 1: Get # of records with a certain subject_id
2525
2626
credentials = DocumentDbSSHCredentials()
2727
with DocumentDbSSHClient(credentials=credentials) as doc_db_client:
28-
filter = {"subject.subject_id": "689418"}
28+
filter = {"subject.subject_id": "731015"}
2929
count = doc_db_client.collection.count_documents(filter)
3030
print(count)
3131
@@ -35,7 +35,7 @@ Filter Example 1: Get records with a certain subject_id
3535
.. code:: python
3636
3737
with DocumentDbSSHClient(credentials=credentials) as doc_db_client:
38-
filter = {"subject.subject_id": "689418"}
38+
filter = {"subject.subject_id": "731015"}
3939
records = list(
4040
doc_db_client.collection.find(filter=filter)
4141
)
@@ -47,7 +47,7 @@ With projection (recommended):
4747
.. code:: python
4848
4949
with DocumentDbSSHClient(credentials=credentials) as doc_db_client:
50-
filter = {"subject.subject_id": "689418"}
50+
filter = {"subject.subject_id": "731015"}
5151
projection = {
5252
"name": 1,
5353
"created": 1,
@@ -68,7 +68,7 @@ Filter Example 2: Get records with a certain breeding group
6868
6969
with DocumentDbSSHClient(credentials=credentials) as doc_db_client:
7070
filter = {
71-
"subject.breeding_info.breeding_group": "Chat-IRES-Cre_Jax006410"
71+
"subject.breeding_info.breeding_group": "Slc17a6-IRES-Cre;Ai230-hyg(ND)"
7272
}
7373
records = list(
7474
doc_db_client.collection.find(filter=filter)
@@ -82,7 +82,7 @@ With projection (recommended):
8282
8383
with DocumentDbSSHClient(credentials=credentials) as doc_db_client:
8484
filter = {
85-
"subject.breeding_info.breeding_group": "Chat-IRES-Cre_Jax006410"
85+
"subject.breeding_info.breeding_group": "Slc17a6-IRES-Cre;Ai230-hyg(ND)"
8686
}
8787
projection = {
8888
"name": 1,
@@ -125,37 +125,19 @@ https://www.mongodb.com/docs/manual/aggregation/
125125
Updating Metadata
126126
~~~~~~~~~~~~~~~~~~~~~~
127127

128-
We provide several utility functions for interacting with DocDB within the
129-
``aind_data_access_api.utils`` module. Below is an example of how to use these
130-
functions to update records in DocDB.
128+
Below is an example of how to update records in DocDB using ``DocumentDbSSHClient``.
131129

132130
.. code:: python
133131
134-
import json
135132
import logging
136-
from typing import List, Optional
137133
138134
from aind_data_access_api.document_db_ssh import (
139135
DocumentDbSSHClient,
140136
DocumentDbSSHCredentials,
141137
)
142-
from aind_data_schema.core.metadata import Metadata
143-
144-
from aind_data_access_api.utils import paginate_docdb, is_dict_corrupt
145138
146139
logging.basicConfig(level="INFO")
147140
148-
def _process_docdb_records(records: List[dict], doc_db_client: DocumentDbSSHClient, dryrun: bool) -> None:
149-
"""
150-
Process records.
151-
Parameters
152-
----------
153-
records : List[dict]
154-
155-
"""
156-
for record in records:
157-
_process_docdb_record(record=record, doc_db_client=doc_db_client, dryrun=dryrun)
158-
159141
def _process_docdb_record(record: dict, doc_db_client: DocumentDbSSHClient, dryrun: bool) -> None:
160142
"""
161143
Process record. This example updates the data_description.name field
@@ -171,58 +153,17 @@ functions to update records in DocDB.
171153
location = record.get("location")
172154
if _id:
173155
if record.get("data_description") and record["data_description"].get("name") != name:
174-
# Option 1: update specific fields(s) only
156+
# update specific fields(s) only
175157
new_fields = {
176158
"data_description.name": name
177159
}
178160
update_docdb_record_partial(record_id=_id, new_fields=new_fields, doc_db_client=doc_db_client, dryrun=dryrun)
179-
# Option 2: build new record Metadata.py and replace entire document with new record
180-
# new_record = build_new_docdb_record(record=record)
181-
# if new_record is not None:
182-
# update_docdb_record_entire(record_id=_id, new_record=new_record, doc_db_client=doc_db_client, dryrun=dryrun)
183161
# else:
184162
# logging.info(f"Record for {location} does not need to be updated.")
185163
else:
186164
logging.warning(f"Record for {location} does not have an _id field! Skipping.")
187165
188166
189-
def build_new_docdb_record(record: Optional[dict]) -> Optional[dict]:
190-
"""Build new record from existing record. This example updates the
191-
data_description.name field if it does not match the record.name field.
192-
193-
Parameters
194-
----------
195-
record : Optional[dict]
196-
197-
Returns
198-
-------
199-
Optional[dict]
200-
The new record, or None if the record cannot be constructed.
201-
"""
202-
# Example: Update record.data_description.name if not matching record.name
203-
new_record = None
204-
if record.get("data_description") and record["data_description"].get("name") != name:
205-
_id = record.get("_id")
206-
name = record.get("name")
207-
location = record.get("location")
208-
created = record.get("created")
209-
if _id is None or name is None or location is None or created is None:
210-
logging.warning(f"Record does not have _id, name, location, or created! Skipping.")
211-
return None
212-
try:
213-
new_record = record.copy()
214-
new_record["data_description"]["name"] = name
215-
new_record_str = Metadata.model_construct(
216-
**new_record
217-
).model_dump_json(warnings=False, by_alias=True)
218-
new_record = json.loads(new_record_str)
219-
if is_dict_corrupt(new_record):
220-
logging.warning(f"New record for {location} is corrupt! Skipping.")
221-
new_record = None
222-
except Exception:
223-
new_record = None
224-
return new_record
225-
226167
def update_docdb_record_partial(record_id: str, new_fields: dict, doc_db_client: DocumentDbSSHClient, dryrun: bool) -> None:
227168
"""
228169
Update record in docdb by updating specific fields only.
@@ -244,54 +185,24 @@ functions to update records in DocDB.
244185
upsert=False,
245186
)
246187
logging.info(response.raw_result)
247-
248-
249-
def update_docdb_record_entire(record_id: str, new_record: dict, doc_db_client: DocumentDbSSHClient, dryrun: bool) -> None:
250-
"""
251-
Update record in docdb by replacing the entire document with new record.
252-
Parameters
253-
----------
254-
record_id : str
255-
The _id of the record to update.
256-
new_record : dict
257-
The new record to replace the existing record with.
258-
259-
"""
260-
if is_dict_corrupt(new_record) or record_id != new_record.get("_id"):
261-
logging.warning(f"Attempting to update corrupt record {record_id}! Skipping.")
262-
return
263-
if dryrun:
264-
logging.info(f"(dryrun) doc_db_client.collection.update_one: {record_id}")
265-
else:
266-
logging.info(f"doc_db_client.collection.update_one: {record_id}")
267-
response = doc_db_client.collection.update_one(
268-
{"_id": record_id},
269-
{"$set": new_record},
270-
upsert=False,
271-
)
272-
logging.info(response.raw_result)
273188
274189
275190
if __name__ == "__main__":
276191
credentials = DocumentDbSSHCredentials() # credentials in environment
277192
dryrun = True
278-
filter = {"location": {"$regex": ".*s3://codeocean-s3datasetsbucket.*"}}
193+
filter = {"location": {"$regex": ".*s3://aind-open-data.*"}}
279194
projection = None
280195
281196
with DocumentDbSSHClient(credentials=credentials) as doc_db_client:
282197
db_name = doc_db_client.database_name
283198
col_name = doc_db_client.collection_name
284199
# count = doc_db_client.collection.count_documents(filter)
285-
# logging.info(f"{db_name}.{col_name}: Found {count} records with {filter}: {count}")
200+
# logging.info(f"{db_name}.{col_name}: Found {count} records with {filter}")
286201
287202
logging.info(f"{db_name}.{col_name}: Starting to scan for {filter}.")
288-
docdb_pages = paginate_docdb(
289-
db_name=doc_db_client.database_name,
290-
collection_name=doc_db_client.collection_name,
291-
docdb_client=doc_db_client._client,
292-
page_size=500,
293-
filter_query=filter,
203+
records = doc_db_client.collection.find(
204+
filter=filter,
294205
)
295-
for page in docdb_pages:
296-
_process_docdb_records(records=page, doc_db_client=doc_db_client, dryrun=dryrun)
206+
for record in records:
207+
_process_docdb_record(record=record, doc_db_client=doc_db_client, dryrun=dryrun)
297208
logging.info(f"{db_name}.{col_name}:Finished scanning through DocDb.")

0 commit comments

Comments
 (0)