Source code for unravel.coordinates.points_to_labeled_voxel_img

#!/usr/bin/env python3

"""
Use ``coords_points_to_labeled_voxel_img`` or ``points_to_labeled_voxel_img`` from UNRAVEL to convert point coordinates into a labeled unique-voxel image.

This is useful when many units/channels occupy the same voxel. Instead of
creating one image per unit, this script groups rows by voxel coordinate and
assigns one unique integer label per occupied voxel.

Input:
    - CSV with voxel coordinates, usually x, y, z from ``coords_physical_points_add_regions``
    - Reference image for output shape/header

Outputs:
    - labeled image where img[x, y, z] = voxel_label
    - mapping CSV linking voxel_label back to all rows/units/channels in that voxel
    - row-level mapping CSV preserving one row per original unit/channel

Notes:
    - Input coordinates must already be voxel/index coordinates.
    - If coordinates are physical coordinates, run ``coords_physical_points_add_regions``
      or ``coords_physical_points_to_img``-style conversion first.
    - Use nearest-neighbor/multiLabel interpolation when reorienting, resampling,
      or warping the output labeled image.
    - Aggregated columns in the mapping CSV (from ``-c``) are stored as
      sorted unique values and are NOT guaranteed to preserve row-wise alignment
      across columns (e.g., unit_id ↔ channel_id). Use the row-level mapping CSV
      for analyses requiring exact correspondence between columns.

Usage:
------
    points_to_labeled_voxel_img \\
        -i units_with_regions.csv \\
        -ri samples/downsampled_standard_NP09_488_Ch0.nii.gz \\
        -x x -y y -z z \\
        [-c recording_name unit_id channel_id shank_id abbreviation full_structure_name lowered_ID] \\
        -o unique_voxels \\
        -v
"""

from pathlib import Path

import numpy as np
import pandas as pd
from rich import print
from rich.traceback import install

from unravel.core.config import Configuration
from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.img_io import load_3D_img, save_3D_img
from unravel.core.utils import get_stem, log_command, verbose_start_msg, verbose_end_msg
from unravel.coordinates.physical_points_to_img import validate_columns


[docs] def parse_args(): parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__) reqs = parser.add_argument_group("Required arguments") reqs.add_argument("-i", "--input", help="Input CSV with voxel coordinates.", required=True, action=SM) reqs.add_argument("-ri", "--ref_img", help="Reference image for output shape/header.", required=True, action=SM) coord_args = parser.add_argument_group("Coordinate arguments") coord_args.add_argument("-x", "--x_col", help="Voxel coordinate column for axis 0. Default: x", default="x", action=SM) coord_args.add_argument("-y", "--y_col", help="Voxel coordinate column for axis 1. Default: y", default="y", action=SM) coord_args.add_argument("-z", "--z_col", help="Voxel coordinate column for axis 2. Default: z", default="z", action=SM) opts = parser.add_argument_group("Optional arguments") opts.add_argument("-c", "--cols", help="Columns to aggregate into mapping CSV (IDs + metadata). For example: recording_name unit_id channel_id shank_id abbreviation full_structure_name lowered_ID", nargs="*", default=None, action=SM) opts.add_argument("-o", "--output", help="Output directory. Default: unique_voxels", default="unique_voxels", action=SM) opts.add_argument("-f", "--filter", help="Optional pandas query string.", default=None, action=SM) opts.add_argument("-p", "--prefix", help="Output filename prefix. Default: input stem", default=None, action=SM) general = parser.add_argument_group("General arguments") general.add_argument("-v", "--verbose", help="Increase verbosity. Default: False", action="store_true", default=False) return parser.parse_args()
[docs] def aggregate_values(series): """Return sorted unique non-null values joined by semicolons.""" vals = series.dropna().astype(str).unique().tolist() vals = sorted(vals) return ";".join(vals)
[docs] def choose_dtype(max_label): """Choose a reasonable integer dtype for the labeled image.""" if max_label <= np.iinfo(np.uint16).max: return np.uint16 return np.uint32
[docs] @log_command def main(): install() args = parse_args() Configuration.verbose = args.verbose verbose_start_msg() input_path = Path(args.input) ref_img_path = Path(args.ref_img) output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) prefix = args.prefix if args.prefix else get_stem(input_path) df = pd.read_csv(input_path) if args.filter: if args.verbose: print(f"\n[blue]Filtering rows using filter:[/] {args.filter}") print(f" Original rows: {len(df)}") df = df.query(args.filter).copy() if args.verbose: print(f" Remaining rows: {len(df)}\n") validate_columns(df, [args.x_col, args.y_col, args.z_col]) ref_img = load_3D_img(ref_img_path, verbose=args.verbose) shape = ref_img.shape # Convert voxel coordinates to integer indices. df = df.copy() df["voxel_x"] = np.rint(df[args.x_col]).astype(int) df["voxel_y"] = np.rint(df[args.y_col]).astype(int) df["voxel_z"] = np.rint(df[args.z_col]).astype(int) in_bounds = ( (df["voxel_x"] >= 0) & (df["voxel_x"] < shape[0]) & (df["voxel_y"] >= 0) & (df["voxel_y"] < shape[1]) & (df["voxel_z"] >= 0) & (df["voxel_z"] < shape[2]) ) n_out_of_bounds = int((~in_bounds).sum()) df_valid = df[in_bounds].copy() if len(df_valid) == 0: raise ValueError("No valid in-bounds points were found.") # Assign one label per unique voxel. unique_voxels = ( df_valid[["voxel_x", "voxel_y", "voxel_z"]] .drop_duplicates() .sort_values(["voxel_x", "voxel_y", "voxel_z"]) .reset_index(drop=True) ) unique_voxels["voxel_label"] = np.arange(1, len(unique_voxels) + 1, dtype=np.int64) df_valid = df_valid.merge(unique_voxels, on=["voxel_x", "voxel_y", "voxel_z"], how="left") dtype = choose_dtype(len(unique_voxels)) img = np.zeros(shape, dtype=dtype) coords = unique_voxels[["voxel_x", "voxel_y", "voxel_z"]].to_numpy(dtype=int) labels = unique_voxels["voxel_label"].to_numpy(dtype=dtype) img[coords[:, 0], coords[:, 1], coords[:, 2]] = labels # Named aggregation is easier when constructed explicitly. grouped = df_valid.groupby("voxel_label", as_index=False) mapping_df = unique_voxels.copy() mapping_df["n_rows"] = mapping_df["voxel_label"].map(grouped.size().set_index("voxel_label")["size"]) # Build a mapping CSV: one row per unique voxel. if args.cols is None: if args.verbose: print("[yellow]No --cols provided → skipping aggregated mapping columns[/]") agg_cols = [] else: agg_cols = [col for col in args.cols if col in df_valid.columns] for col in agg_cols: mapping_df[col] = mapping_df["voxel_label"].map(grouped[col].agg(aggregate_values).set_index("voxel_label")[col]) # Also save a row-level mapping so every original unit/channel can be recovered. row_mapping_cols = ["voxel_label", "voxel_x", "voxel_y", "voxel_z"] + [ col for col in df_valid.columns if col not in ["voxel_label", "voxel_x", "voxel_y", "voxel_z"] ] row_mapping_df = df_valid[row_mapping_cols].copy() output_img_path = output_dir / f"{prefix}__labeled_unique_voxels.nii.gz" output_map_path = output_dir / f"{prefix}__unique_voxel_mapping.csv" output_rows_path = output_dir / f"{prefix}__row_to_unique_voxel_mapping.csv" # Convert numpy dtype to a string for nibabel. This is a bit hacky but ensures we get the correct integer type in the output NIfTI. if dtype == np.uint16: nii_dtype = "uint16" elif dtype == np.uint32: nii_dtype = "uint32" else: raise ValueError(f"Unsupported dtype: {dtype}") save_3D_img( img, output_path=output_img_path, reference_img=ref_img_path, data_type=nii_dtype, verbose=args.verbose, ) mapping_df.to_csv(output_map_path, index=False) row_mapping_df.to_csv(output_rows_path, index=False) print(f"\n Input rows: {len(df)}") print(f" In-bounds rows: {len(df_valid)}") print(f" Out-of-bounds rows skipped: {n_out_of_bounds}") print(f" Unique occupied voxels: {len(unique_voxels)}") print(f" Max voxel label: {len(unique_voxels)}") print(f" Output image: {output_img_path}") print(f" Unique voxel mapping CSV: {output_map_path}") print(f" Row-to-voxel mapping CSV: {output_rows_path}\n") verbose_end_msg()
if __name__ == "__main__": main()