Source code for datahipy.bids.participant

# Copyright (C) 2022-2023, The HIP team and Contributors, All rights reserved.
#  This software is distributed under the open-source Apache 2.0 license.

"""Utility functions to retrieve participant-level information from a BIDS dataset."""

import pandas as pd
from os import path as op
from datahipy.bids.const import (
    VALID_EXTENSIONS,
    BIDS_ENTITY_MAP,
    BIDSJSONFILE_DATATYPE_KEY_MAP,
    BIDSTSVFILE_DATATYPE_KEY_MAP,
)


[docs]def get_subject_bidsfile_info(bids_dir, **kwargs): """Return a list of dictionaries with BIDS file information for a given subject. Parameters ---------- bids_dir : str Path to the BIDS dataset. kwargs : dict Dictionary of arguments key/value to pass to the pybids BIDSLayout.get() function. Returns ------- subject_info : list List of dictionaries with BIDS file information for a given subject. """ # Import the required functions from datahipy.bids.dataset import create_bids_layout from datahipy.bids.electrophy import get_channels_info # Create a pybids representation of the dataset layout = create_bids_layout(bids_dir) # Get the list of files for the given subject (and session, task and run if provided) files = layout.get(**kwargs) # Initialize the dictionary to be returned subject_bids_file_info = [] # Loop over the found files for file in files: # Initialize the dictionary with the file information file_info = {} # Skip the json and tsv files if (file.entities["extension"] in [".json", ".tsv"]) or ( file.entities["extension"] not in VALID_EXTENSIONS ): continue # Extract the datatype file_info["datatype"] = file.entities["datatype"] # Extract the modality from the suffix file_info["modality"] = file.entities["suffix"] file_info["extension"] = file.entities["extension"] # Extract all the "proper" BIDS entities for key in file.entities: if key in BIDS_ENTITY_MAP.keys(): file_info[BIDS_ENTITY_MAP[key]] = file.entities[key] # Extract the relative path of the file file_info["fileLoc"] = file.relpath # Extract the file metadata from the BIDS json sidecar file file_metadata = layout.get_metadata(file.path) if file_metadata: file_info[ BIDSJSONFILE_DATATYPE_KEY_MAP[file_info["datatype"]] ] = file_metadata del file_metadata # Extract the channel information from the channels tsv file for EEG, MEG and iEEG if file_info["datatype"] in ["eeg", "meg", "ieeg"]: file_info[ BIDSTSVFILE_DATATYPE_KEY_MAP[file_info["datatype"]] ] = get_channels_info( file.path.split(f'_{file_info["datatype"]}')[0] + "_channels.tsv" ) # Add the file information to the list subject_bids_file_info.append(file_info) # Return the list of dictionaries return subject_bids_file_info
[docs]def get_participants_info(bids_dir): """Update the input `dataset_desc` dictionary with information from the `participants.tsv` file. Parameters ---------- dataset_desc : dict Input dictionary with the dataset content to be indexed. bids_dir : str Path to the BIDS dataset. Returns ------- dataset_desc : dict Updated dictionary with the dataset content to be indexed. """ participants_info = {} # Load the participants.tsv file to extract information about participants try: participants_df = pd.read_csv( op.join(bids_dir, "participants.tsv"), sep="\t", header=0, na_filter=False, ) except pd.errors.EmptyDataError: participants_df = pd.DataFrame() # Get min and max age of participants if "age" in participants_df.keys(): participants_info["AgeMin"] = f'{participants_df["age"].min()}' participants_info["AgeMax"] = f'{participants_df["age"].max()}' else: participants_info["AgeMin"] = None participants_info["AgeMax"] = None participants_info["ParticipantsCount"] = len(participants_df.index) participants_info["ParticipantsGroups"] = ( list(participants_df["group"].unique()) if "group" in participants_df.keys() else [None] ) # Store content of participants.tsv as a dictionary participants_info["Participants"] = participants_df.to_dict(orient="records") del participants_df return participants_info