diff --git a/.github/workflows/ci_windows.yaml b/.github/workflows/ci_windows.yaml index 9880d626..55f671ed 100644 --- a/.github/workflows/ci_windows.yaml +++ b/.github/workflows/ci_windows.yaml @@ -6,6 +6,9 @@ on: pull_request: branches: ["main"] +permissions: + contents: read + jobs: build: strategy: diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index ac17f364..479301e3 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -37,6 +37,8 @@ jobs: - name: Run Claude Code Review id: claude-review + if: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN != '' }} + continue-on-error: true uses: anthropics/claude-code-action@v1 with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} diff --git a/.github/workflows/spec_tests.yaml b/.github/workflows/spec_tests.yaml index 578424b7..ab92943f 100644 --- a/.github/workflows/spec_tests.yaml +++ b/.github/workflows/spec_tests.yaml @@ -6,6 +6,9 @@ on: pull_request: branches: ["*"] +permissions: + contents: read + jobs: build: strategy: diff --git a/.github/workflows/test_installer.yaml b/.github/workflows/test_installer.yaml index 85f5d648..3bd93d87 100644 --- a/.github/workflows/test_installer.yaml +++ b/.github/workflows/test_installer.yaml @@ -4,6 +4,9 @@ on: pull_request: branches: ["main"] +permissions: + contents: read + jobs: build: runs-on: ubuntu-latest diff --git a/docs/api/tools.rst b/docs/api/tools.rst index f6312c4a..09517ea6 100644 --- a/docs/api/tools.rst +++ b/docs/api/tools.rst @@ -14,6 +14,30 @@ EventManager :undoc-members: :show-inheritance: +EventChecker +~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.event_checker.EventChecker + :members: + :undoc-members: + :show-inheritance: + +EventsChecker +~~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.event_checker.EventsChecker + :members: + :undoc-members: + :show-inheritance: + +EventsSummary +~~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.events_summary.EventsSummary + :members: + :undoc-members: + :show-inheritance: + HedTagManager ~~~~~~~~~~~~~ @@ -22,6 +46,22 @@ HedTagManager :undoc-members: :show-inheritance: +HedTagCount +~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.hed_tag_counts.HedTagCount + :members: + :undoc-members: + :show-inheritance: + +HedTagCounts +~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.hed_tag_counts.HedTagCounts + :members: + :undoc-members: + :show-inheritance: + HedTypeManager ~~~~~~~~~~~~~~ @@ -30,6 +70,46 @@ HedTypeManager :undoc-members: :show-inheritance: +HedType +~~~~~~~ + +.. autoclass:: hed.tools.analysis.hed_type.HedType + :members: + :undoc-members: + :show-inheritance: + +HedTypeDefs +~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.hed_type_defs.HedTypeDefs + :members: + :undoc-members: + :show-inheritance: + +HedTypeFactors +~~~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.hed_type_factors.HedTypeFactors + :members: + :undoc-members: + :show-inheritance: + +HedTypeCount +~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.hed_type_counts.HedTypeCount + :members: + :undoc-members: + :show-inheritance: + +HedTypeCounts +~~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.hed_type_counts.HedTypeCounts + :members: + :undoc-members: + :show-inheritance: + TabularSummary ~~~~~~~~~~~~~~ @@ -38,10 +118,10 @@ TabularSummary :undoc-members: :show-inheritance: -HedType -~~~~~~~ +ColumnNameSummary +~~~~~~~~~~~~~~~~~ -.. autoclass:: hed.tools.analysis.hed_type.HedType +.. autoclass:: hed.tools.analysis.column_name_summary.ColumnNameSummary :members: :undoc-members: :show-inheritance: @@ -54,6 +134,29 @@ FileDictionary :undoc-members: :show-inheritance: +KeyMap +~~~~~~ + +.. autoclass:: hed.tools.analysis.key_map.KeyMap + :members: + :undoc-members: + :show-inheritance: + +TemporalEvent +~~~~~~~~~~~~~ + +.. autoclass:: hed.tools.analysis.temporal_event.TemporalEvent + :members: + :undoc-members: + :show-inheritance: + +Annotation utilities +~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: hed.tools.analysis.annotation_util + :members: + :undoc-members: + BIDS tools ---------- @@ -101,5 +204,29 @@ BIDS utilities ~~~~~~~~~~~~~~ .. automodule:: hed.tools.bids.bids_util - :members: parse_bids_filename + :members: + :undoc-members: + +Utility functions +----------------- + +DataFrame utilities +~~~~~~~~~~~~~~~~~~~ + +.. automodule:: hed.tools.util.data_util + :members: + :undoc-members: + +File/IO utilities +~~~~~~~~~~~~~~~~~ + +.. automodule:: hed.tools.util.io_util + :members: + :undoc-members: + +Schema utilities +~~~~~~~~~~~~~~~~ + +.. automodule:: hed.tools.util.schema_util + :members: :undoc-members: diff --git a/docs/api/validator.rst b/docs/api/validator.rst index 56399d67..906990de 100644 --- a/docs/api/validator.rst +++ b/docs/api/validator.rst @@ -49,9 +49,6 @@ OnsetValidator :undoc-members: :show-inheritance: -Validation utilities --------------------- - ReservedChecker ~~~~~~~~~~~~~~~ @@ -59,3 +56,62 @@ ReservedChecker :members: :undoc-members: :show-inheritance: + +Validator utilities +------------------- + +CharValidator +~~~~~~~~~~~~~ + +.. autoclass:: hed.validator.util.char_util.CharValidator + :members: + :undoc-members: + :show-inheritance: + +CharRexValidator +~~~~~~~~~~~~~~~~ + +.. autoclass:: hed.validator.util.char_util.CharRexValidator + :members: + :undoc-members: + :show-inheritance: + +UnitValueValidator +~~~~~~~~~~~~~~~~~~ + +.. autoclass:: hed.validator.util.class_util.UnitValueValidator + :members: + :undoc-members: + :show-inheritance: + +DuplicateChecker +~~~~~~~~~~~~~~~~ + +.. autoclass:: hed.validator.util.dup_util.DuplicateChecker + :members: + :undoc-members: + :show-inheritance: + +GroupValidator +~~~~~~~~~~~~~~ + +.. autoclass:: hed.validator.util.group_util.GroupValidator + :members: + :undoc-members: + :show-inheritance: + +StringValidator +~~~~~~~~~~~~~~~ + +.. autoclass:: hed.validator.util.string_util.StringValidator + :members: + :undoc-members: + :show-inheritance: + +TagValidator +~~~~~~~~~~~~ + +.. autoclass:: hed.validator.util.tag_util.TagValidator + :members: + :undoc-members: + :show-inheritance: diff --git a/hed/scripts/__init__.py b/hed/scripts/__init__.py index e69de29b..35546c44 100644 --- a/hed/scripts/__init__.py +++ b/hed/scripts/__init__.py @@ -0,0 +1 @@ +"""Legacy CLI entry point scripts (deprecated — prefer the ``hedpy`` CLI).""" diff --git a/hed/scripts/add_hed_ids.py b/hed/scripts/add_hed_ids.py index 786b0f23..53a13505 100644 --- a/hed/scripts/add_hed_ids.py +++ b/hed/scripts/add_hed_ids.py @@ -1,3 +1,5 @@ +"""CLI script to add missing HED IDs to a schema in the hed-schemas repository.""" + from hed.scripts.schema_script_util import get_prerelease_path from hed.scripts.hed_convert_schema import convert_and_update import argparse @@ -6,6 +8,12 @@ # Slightly tweaked version of hed_convert_schema.py with a new main function to allow different parameters. def main(): + """Entry point: parse arguments and add HED IDs to the specified schema version. + + Returns: + int: 0 on success, non-zero on failure. + + """ parser = argparse.ArgumentParser(description="Add hed ids to a specific schema.") parser.add_argument("repo_path", help="The location of the hed-schemas directory") parser.add_argument("schema_name", help='The name of the schema("standard" for standard schema) to modify') diff --git a/hed/scripts/hed_convert_schema.py b/hed/scripts/hed_convert_schema.py index 7d3880d0..e6f2bd61 100644 --- a/hed/scripts/hed_convert_schema.py +++ b/hed/scripts/hed_convert_schema.py @@ -1,3 +1,5 @@ +"""CLI script to validate and convert HED schema files across formats (XML, MediaWiki, TSV, JSON).""" + from hed.scripts.schema_script_util import sort_base_schemas, validate_all_schemas, add_extension from hed.schema.schema_io import load_dataframes, save_dataframes from hed.schema.schema_io.hed_id_util import update_dataframes_from_schema @@ -76,6 +78,15 @@ def convert_and_update(filenames, set_ids): def main(arg_list=None): + """Entry point: parse arguments and convert/validate the named schema files. + + Parameters: + arg_list (list[str] or None): Argument list for testing; uses sys.argv if None. + + Returns: + int: 0 on success, non-zero if validation fails. + + """ parser = argparse.ArgumentParser(description="Update other schema formats based on the changed one.") parser.add_argument("filenames", nargs="*", help="List of files to process") parser.add_argument("--set-ids", action="store_true", help="Add missing HED ids") diff --git a/hed/scripts/schema_script_util.py b/hed/scripts/schema_script_util.py index 77df92ec..ac71cb1c 100644 --- a/hed/scripts/schema_script_util.py +++ b/hed/scripts/schema_script_util.py @@ -1,3 +1,5 @@ +"""Shared utilities for schema validation, sorting, and format conversion used by CLI scripts.""" + import os.path from collections import defaultdict from hed.schema import from_string, load_schema, from_dataframes diff --git a/hed/scripts/validate_bids.py b/hed/scripts/validate_bids.py index a640abad..5e53ccb1 100644 --- a/hed/scripts/validate_bids.py +++ b/hed/scripts/validate_bids.py @@ -196,6 +196,15 @@ def format_final_report(issue_list): def main(arg_list=None): + """Entry point: parse arguments, validate the BIDS dataset, and print a summary report. + + Parameters: + arg_list (list[str] or None): Argument list for testing; uses sys.argv if None. + + Returns: + int: 0 if no issues found, 1 if validation issues are present. + + """ # Create the argument parser parser = get_parser() @@ -231,6 +240,16 @@ def main(arg_list=None): def validate_dataset(args): + """Run HED validation on the BIDS dataset described by args and return the issue list. + + Parameters: + args (argparse.Namespace): Parsed CLI arguments including data_path, suffixes, and + check_for_warnings. + + Returns: + list[dict]: Validation issue dictionaries (empty list if no issues). + + """ logger = logging.getLogger("validate_bids") logger.info(f"Data directory: {args.data_path}") logger.info(f"HEDTools version: {__version__}") diff --git a/hed/scripts/validate_schemas.py b/hed/scripts/validate_schemas.py index 422d8410..80e92854 100644 --- a/hed/scripts/validate_schemas.py +++ b/hed/scripts/validate_schemas.py @@ -1,3 +1,5 @@ +"""CLI script to validate HED schema files for compliance and format consistency.""" + import sys from hed.scripts.schema_script_util import validate_all_schemas, sort_base_schemas from hed.errors import get_printable_issue_string @@ -20,6 +22,15 @@ def get_parser(): def main(arg_list=None): + """Entry point: parse arguments and validate the specified schema files. + + Parameters: + arg_list (list[str] or None): Argument list for testing; uses sys.argv if None. + + Returns: + int: 0 if all schemas are valid, 1 if any issues are found. + + """ parser = get_parser() args = parser.parse_args(arg_list) diff --git a/hed/tools/analysis/event_checker.py b/hed/tools/analysis/event_checker.py index 0f0eec03..e2522dd6 100644 --- a/hed/tools/analysis/event_checker.py +++ b/hed/tools/analysis/event_checker.py @@ -1,9 +1,13 @@ +"""Checker that validates event-level HED annotation quality for BIDS datasets.""" + from hed.errors.error_types import TagQualityErrors from hed.errors import ErrorHandler, ErrorContext, sort_issues from hed.tools import EventManager, HedTagManager class EventChecker: + """Validates that HED-annotated events meet quality requirements such as having a top-level event tag.""" + EVENT_TAGS = { "Event", "Sensory-event", diff --git a/hed/tools/analysis/events_summary.py b/hed/tools/analysis/events_summary.py index e2f151ed..4a34219f 100644 --- a/hed/tools/analysis/events_summary.py +++ b/hed/tools/analysis/events_summary.py @@ -1,3 +1,5 @@ +"""Summarizes events in a tabular file by checking tag coverage and quality.""" + from hed import TabularInput from hed.errors import ErrorHandler from hed.errors.error_types import TagQualityErrors @@ -5,6 +7,8 @@ class EventsSummary: + """Summarizes HED event annotations for a tabular file, grouping tags by stimulus/response categories.""" + # Excluding tags for condition-variables and task -- these can be done separately if we want to. REMOVE_TYPES = ["Condition-variable", "Task"] # Tags organized by whether they are found with either of these @@ -175,9 +179,29 @@ def extract_tag_summary(self): @staticmethod def match_tags(all_tags, key): + """Return True if any tag in all_tags has a short_base_tag matching key. + + Parameters: + all_tags (list[HedTag]): The tags to search. + key (str): The short base tag name to look for. + + Returns: + bool: True if a match is found. + + """ return any(tag.short_base_tag == key for tag in all_tags) def update_tags(self, tag_set, all_tags): + """Add the most-specific ancestor tag names from all_tags into tag_set, respecting cutoff categories. + + Parameters: + tag_set (set): The running set of tag terms to update. + all_tags (list[HedTag]): Tags to process. + + Returns: + set: The updated tag_set. + + """ for tag in all_tags: terms = tag.tag_terms if any(item in self.EXCLUDED_PARENTS for item in terms): diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 3e691da6..163c9374 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -63,6 +63,12 @@ def get_summary(self) -> dict: return {"tag": self.tag, "events": self.events, "files": list(self.files)} def get_empty(self): + """Return a copy of this entry with counts reset to zero. + + Returns: + HedTagCount: A new instance with the same tag name but zeroed event/file counts. + + """ empty = copy.copy(self) empty.events = 0 empty.files = {} diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py index 7cfe1a69..9db68ada 100644 --- a/hed/tools/analysis/hed_type.py +++ b/hed/tools/analysis/hed_type.py @@ -32,6 +32,12 @@ def __init__(self, event_manager, name, type_tag="condition-variable"): @property def total_events(self): + """Return the total number of events in the associated event list. + + Returns: + int: Number of events. + + """ return len(self.event_manager.event_list) def get_type_value_factors(self, type_value): @@ -60,6 +66,12 @@ def get_type_value_level_info(self, type_value): @property def type_variables(self): + """Return the set of type-value names (keys) found in this HedType. + + Returns: + set[str]: Set of lowercased type-value name strings. + + """ return set(self._type_map.keys()) def get_type_def_names(self): @@ -70,9 +82,21 @@ def get_type_def_names(self): return list(set(tag_list)) def get_type_value_names(self): + """Return the list of type-value names defined in this HedType. + + Returns: + list[str]: Lowercased type-value name strings. + + """ return list(self._type_map.keys()) def get_summary(self): + """Return a summary dict mapping each type-value name to its factor summary. + + Returns: + dict: Keys are type-value name strings; values are factor summary dicts. + + """ var_summary = self._type_map.copy() summary = {} for var_name, var_sum in var_summary.items(): diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index 41ac72b8..3f31c10c 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -309,6 +309,17 @@ def _make_sidecar_dict(self, json_files): @staticmethod def create_file_group(root_path, file_list, suffix): + """Construct a BidsFileGroup from a list of files sharing the given suffix. + + Parameters: + root_path (str): Root path of the BIDS dataset. + file_list (list[str]): List of file paths belonging to this suffix group. + suffix (str): BIDS file suffix identifying this group (e.g. ``events``). + + Returns: + BidsFileGroup or None: The constructed group, or None if it contains no sidecars or data files. + + """ logger = logging.getLogger("hed.bids_file_group") logger.debug(f"Creating file group for suffix '{suffix}' from {len(file_list)} files") diff --git a/hed/tools/bids/bids_util.py b/hed/tools/bids/bids_util.py index d7329425..165d72ed 100644 --- a/hed/tools/bids/bids_util.py +++ b/hed/tools/bids/bids_util.py @@ -1,3 +1,5 @@ +"""BIDS utility functions for schema loading, sidecar merging, and inheritance chain resolution.""" + import os import json from hed.tools.util.io_util import get_full_extension @@ -5,6 +7,15 @@ def get_schema_from_description(root_path): + """Load the HED schema version declared in the BIDS dataset_description.json. + + Parameters: + root_path (str): Root path of the BIDS dataset. + + Returns: + HedSchema or None: The loaded schema, or None if loading fails. + + """ try: description_path = os.path.abspath(os.path.join(root_path, "dataset_description.json")) with open(description_path, "r") as fp: @@ -109,6 +120,16 @@ def update_entity(name_dict, entity): def get_merged_sidecar(root_path, tsv_file): + """Return a merged sidecar dict following BIDS inheritance rules for a given TSV file. + + Parameters: + root_path (str): Root path of the BIDS dataset. + tsv_file (str): Path to the TSV file whose inherited sidecars should be merged. + + Returns: + dict: Merged sidecar dictionary. Keys from closer (more specific) sidecar files take precedence. + + """ sidecar_files = list(walk_back(root_path, tsv_file)) merged_sidecar = {} # Process from closest to most distant - first file wins for each key @@ -123,6 +144,19 @@ def get_merged_sidecar(root_path, tsv_file): def walk_back(root_path, file_path): + """Yield inherited sidecar file paths from the directory of file_path back toward root_path. + + Traverses parent directories from the file's location up to root_path, yielding any sidecar + JSON files that apply to the given TSV according to BIDS inheritance rules. + + Parameters: + root_path (str): Root path of the BIDS dataset. + file_path (str): Path to the data file whose applicable sidecars should be found. + + Yields: + str: Absolute paths of applicable sidecar JSON files, from nearest to farthest. + + """ file_path = os.path.abspath(file_path) source_dir = os.path.dirname(file_path) root_path = os.path.abspath(root_path) # Normalize root_path for cross-platform support @@ -154,6 +188,16 @@ def walk_back(root_path, file_path): def get_candidates(source_dir, tsv_file_dict): + """Return sidecar JSON files in source_dir that are applicable to tsv_file_dict. + + Parameters: + source_dir (str): Directory to search for candidate sidecar files. + tsv_file_dict (dict): Parsed BIDS filename dict for the target TSV file. + + Returns: + list[str]: Absolute paths to matching sidecar JSON files. + + """ candidates = [] for file in os.listdir(source_dir): this_path = os.path.realpath(os.path.join(source_dir, file)) @@ -168,6 +212,19 @@ def get_candidates(source_dir, tsv_file_dict): def matches_criteria(json_file_dict, tsv_file_dict): + """Return True if a candidate sidecar JSON file applies to the given TSV file. + + A sidecar applies when its extension is ``.json``, its suffix matches the TSV, and all + BIDS entities in the JSON filename have equal values in the TSV filename. + + Parameters: + json_file_dict (dict): Parsed BIDS filename dict for the candidate JSON file. + tsv_file_dict (dict): Parsed BIDS filename dict for the target TSV file. + + Returns: + bool: True if the sidecar is applicable. + + """ extension_is_valid = json_file_dict["ext"].lower() == ".json" suffix_is_valid = (json_file_dict["suffix"] == tsv_file_dict["suffix"]) or not tsv_file_dict["suffix"] json_entities = json_file_dict["entities"] diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index ae900a2c..b2ef0977 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -170,6 +170,15 @@ def clean_filename(filename): def get_basename(file_path): + """Return the base filename (without extension) for the given path. + + Parameters: + file_path (str): Path to a file. + + Returns: + str: The filename stem, e.g. ``sub-01_task-rest_events`` for ``sub-01_task-rest_events.tsv``. + + """ return get_full_extension(file_path)[0] diff --git a/hed/validator/reserved_checker.py b/hed/validator/reserved_checker.py index ea475c1d..a6c4d2a4 100644 --- a/hed/validator/reserved_checker.py +++ b/hed/validator/reserved_checker.py @@ -1,3 +1,5 @@ +"""Singleton checker that validates reserved HED tag usage rules loaded from reservedTags.json.""" + import json import os import math @@ -8,6 +10,8 @@ class ReservedChecker: + """Thread-safe singleton that loads reserved tag rules and checks groups for compliance.""" + _instance = None _lock = Lock() reserved_reqs_path = os.path.join(os.path.dirname(__file__), "data/reservedTags.json") @@ -29,6 +33,12 @@ def _initialize(self): @staticmethod def get_instance(): + """Return the singleton ReservedChecker instance, creating it on first call. + + Returns: + ReservedChecker: The shared singleton instance. + + """ if ReservedChecker._instance is None: ReservedChecker._instance = ReservedChecker() return ReservedChecker._instance @@ -47,6 +57,15 @@ def _get_special_tags_by_property(self, property_name): return {value["name"] for value in self.reserved_map.values() if value.get(property_name) is True} def get_reserved(self, group): + """Return the list of reserved tags found directly within the given HED group. + + Parameters: + group (HedGroup): The group to inspect. + + Returns: + list[HedTag]: Tags in the group whose short base tag is a reserved name. + + """ reserved_tags = [tag for tag in group.tags() if tag.short_base_tag in self.special_names] return reserved_tags diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 5fdb70d5..08a035e0 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -19,6 +19,8 @@ # todo: Add/improve validation for definitions being in known columns(right now it just assumes they aren't) class SidecarValidator: + """Validates HED annotations in a BIDS JSON sidecar against a HED schema.""" + reserved_column_names = ["HED"] reserved_category_values = ["n/a"] diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index dd1eab03..6cc595bd 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -20,6 +20,8 @@ class SpreadsheetValidator: + """Validates HED annotations in a tabular (TSV/Excel) spreadsheet against a HED schema.""" + ONSET_TOLERANCE = 1e-7 TEMPORAL_ANCHORS = re.compile(r"|".join(map(re.escape, ["onset", "inset", "offset", "delay"]))) diff --git a/hed/validator/util/char_util.py b/hed/validator/util/char_util.py index a0708279..66e0f18e 100644 --- a/hed/validator/util/char_util.py +++ b/hed/validator/util/char_util.py @@ -183,7 +183,16 @@ def __init__(self, modern_allowed_char_rules=False): self._rex_dict = self._get_rex_dict() def get_problem_chars(self, in_str, cname): + """Return a list of (index, char) pairs for characters in in_str not allowed by the value class cname. + Parameters: + in_str (str): The string to check. + cname (str): The value class name used to look up allowed character classes. + + Returns: + list[tuple[int, str]]: Each tuple contains the character index and the offending character. + + """ # List to store problem indices and characters bad_indices = [] @@ -209,6 +218,19 @@ def get_problem_chars(self, in_str, cname): return bad_indices def is_valid_value(self, in_string, cname): + """Check whether in_string is a valid whole-word value for class cname. + + Parameters: + in_string (str): The string to validate. + cname (str): The value class name to look up the word-level regex for. + + Returns: + True | re.Match | False: + - ``True`` if no word-level regex is defined for *cname* (class imposes no constraint). + - A ``re.Match`` object if *in_string* matches the word-level regex (valid value). + - ``False`` if *in_string* does not match the word-level regex (invalid value). + + """ # Retrieve the allowed character classes for the given class_name class_regex = self._rex_dict["class_words"].get(cname, []) if not class_regex: diff --git a/hed/validator/util/class_util.py b/hed/validator/util/class_util.py index 50f2d194..30cb7aba 100644 --- a/hed/validator/util/class_util.py +++ b/hed/validator/util/class_util.py @@ -147,6 +147,18 @@ def _check_value_class(self, original_tag, stripped_value, report_as): @staticmethod def report_value_errors(error_dict, class_valid, report_as): + """Build validation issues from per-class character error and validity dicts. + + Parameters: + error_dict (dict): Mapping of class name to list of (char, index) problem tuples. + class_valid (dict): Mapping of class name to a validity result (``True``, ``re.Match``, or ``False``) + indicating whether the full value passed word-level format validation for that class. + report_as (HedTag): The tag object used as context in error reporting. + + Returns: + list[dict]: Validation issue dictionaries. + + """ validation_issues = [] for class_name, errors in error_dict.items(): if not errors and class_valid[class_name]: @@ -165,6 +177,17 @@ def report_value_errors(error_dict, class_valid, report_as): @staticmethod def report_value_char_errors(class_name, errors, report_as): + """Build validation issues for specific invalid characters within a value class string. + + Parameters: + class_name (str): The value class name that detected the errors. + errors (list[tuple[str, int]]): Character/index pairs of invalid characters. + report_as (HedTag): The tag object used as context in error reporting. + + Returns: + list[dict]: Validation issue dictionaries. + + """ validation_issues = [] for value in errors: if value[0] in "{}": @@ -225,6 +248,16 @@ def validate_value_class_type(self, unit_or_value_portion, valid_types) -> bool: def find_invalid_positions(s, pattern): + """Return a list of (index, char) pairs for characters in s that do not match pattern. + + Parameters: + s (str): The string to scan. + pattern (str): A single-character regex pattern specifying valid characters. + + Returns: + list[tuple[int, str]]: Each tuple contains the character index and the invalid character. + + """ # List to store positions of invalid characters invalid_positions = [] @@ -258,6 +291,18 @@ def is_date_time_value_class(date_time_string) -> bool: def is_name_value_class(name_str) -> bool: + """Return True if name_str is a valid HED name-value. + + Allowed characters are ASCII word characters (letters, digits, underscore), + hyphens, and Unicode code points U+0080 through U+FFFF. + + Parameters: + name_str (str): The string to validate. + + Returns: + bool: True if the string matches the allowed pattern. + + """ pattern = r"^[\w\-\u0080-\uFFFF]+$" if re.fullmatch(pattern, name_str): return True diff --git a/hed/validator/util/dup_util.py b/hed/validator/util/dup_util.py index d738ab47..15cc1d5c 100644 --- a/hed/validator/util/dup_util.py +++ b/hed/validator/util/dup_util.py @@ -1,3 +1,5 @@ +"""Utility checker that detects duplicate tags and groups within a HED annotation.""" + from typing import Union from hed.errors.error_reporter import ErrorHandler @@ -6,6 +8,8 @@ class DuplicateChecker: + """Detects duplicate tags and groups within a HED annotation.""" + def __init__(self): """Checker for duplications in HED groups. diff --git a/hed/validator/util/string_util.py b/hed/validator/util/string_util.py index 5d6f965f..daebc89f 100644 --- a/hed/validator/util/string_util.py +++ b/hed/validator/util/string_util.py @@ -12,6 +12,15 @@ class StringValidator: COMMA = "," def run_string_validator(self, hed_string_obj): + """Run all string-level structural checks on a HED string object. + + Parameters: + hed_string_obj (HedString): The parsed HED string to validate. + + Returns: + list[dict]: Validation issue dictionaries. + + """ validation_issues = [] validation_issues += self.check_count_tag_group_parentheses(hed_string_obj.get_original_hed_string()) validation_issues += self.check_delimiter_issues_in_hed_string(hed_string_obj.get_original_hed_string()) diff --git a/hed/validator/util/tag_util.py b/hed/validator/util/tag_util.py index 81c181e5..c4df03f2 100644 --- a/hed/validator/util/tag_util.py +++ b/hed/validator/util/tag_util.py @@ -102,6 +102,15 @@ def check_capitalization(self, original_tag) -> list[dict]: return validation_issues def check_tag_is_deprecated(self, original_tag) -> list[dict]: + """Return a validation issue if the tag carries the DeprecatedFrom attribute. + + Parameters: + original_tag (HedTag): The tag to check. + + Returns: + list[dict]: A singleton list with a deprecation issue, or an empty list. + + """ validation_issues = [] if original_tag.has_attribute(HedKey.DeprecatedFrom): validation_issues += ErrorHandler.format_error(ValidationErrors.ELEMENT_DEPRECATED, tag=original_tag) diff --git a/spec_tests/hed-schemas b/spec_tests/hed-schemas index f78fc49c..2edc0260 160000 --- a/spec_tests/hed-schemas +++ b/spec_tests/hed-schemas @@ -1 +1 @@ -Subproject commit f78fc49c94c1104fc6c02f4471d8ec561213680f +Subproject commit 2edc0260e4c4ef3d58ffcb6779f5032c2a3b43ed