Source code for unravel.cluster_stats.find_incongruent_clusters

#!/usr/bin/env python3

"""
Use ``cstats_find_incongruent`` (``cfi``) from UNRAVEL if ``cstats_fdr`` was used to convert non-directional p value maps into directional cluster indices. This helps to find clusters where the direction of the mean intensity difference between groups does not match direction of the difference in cell/label density between groups.

Input:
    - ./_cluster_validation_info/tukey_results.csv  or ttest_results.csv from ``cstats``
    - Columns: 'cluster_ID', 'comparison', 'higher_mean_group', 'p-value', 'significance'

Output:
    - Cluster IDs where the mean intensity difference does not match the density difference between groups A and B.

Note: 
    - This is modules is useful to find clusters where z-scoring introduces incongruencies between the mean intensity difference and the density difference.
    - For example, if group A has increased IF in region A and not B, z-scoring may decrease the relative intensity of region B. 
    - This decrease for region B for one group, may introduce a difference in the mean intensity between groups that is not reflected in the density difference.

Usage:
------
    cstats_find_incongruent -c tukey_results.csv -l groupA -g groupB [-v]
"""

from pathlib import Path
import pandas as pd
from glob import glob
from rich import print
from rich.traceback import install

from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM

from unravel.core.config import Configuration
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg



[docs]
def parse_args():
    parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__)

    reqs = parser.add_argument_group('Required arguments')
    reqs.add_argument('-c', '--csv_name', help='Name of the CSV file.', required=True, action=SM)
    reqs.add_argument('-l', '--lesser_group', help='Group with a lower mean for the comparison of interest.', required=True, action=SM)
    reqs.add_argument('-g', '--greater_group', help='Group with a higher mean for the comparison of interest.', required=True, action=SM)

    general = parser.add_argument_group('General arguments')
    general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False)

    return parser.parse_args()




[docs]
def find_incongruent_clusters(df, expected_lower_mean_group, expected_higher_mean_group):

    # Determine the comparison string (e.g. 'groupA vs groupB' in the 'comparison' column)
    comparison_str1 = f'{expected_lower_mean_group} vs {expected_higher_mean_group}'
    comparison_str2 = f'{expected_higher_mean_group} vs {expected_lower_mean_group}'

    # Filter data based on the comparison string
    filtered_df = df[
        (df['comparison'] == comparison_str1) |
        (df['comparison'] == comparison_str2)
    ]

    # Find clusters that are significant and incongruent with the prediction
    incongruent_clusters = filtered_df[
        (filtered_df['significance'] != 'n.s.') &
        (filtered_df['higher_mean_group'] != expected_higher_mean_group)
    ]['cluster_ID'].tolist()
    
    return incongruent_clusters



[docs]
@log_command
def main():
    install()
    args = parse_args()
    Configuration.verbose = args.verbose
    verbose_start_msg()

    current_dir = Path.cwd()

    # Consctruct substring to find matching subdirs
    substring_str1 = f'{args.greater_group}_gt_{args.lesser_group}'
    substring_str2 = f'{args.lesser_group}_lt_{args.greater_group}'

    # Iterate over all subdirectories in the current working directory
    for subdir in [d for d in current_dir.iterdir() if d.is_dir() and (substring_str1 in d.name or substring_str2 in d.name)]:
        print(f"\nProcessing directory: [default bold]{subdir.name}[/]")

        df = pd.read_csv(subdir / "_valid_clusters_stats" / args.csv_name)
        incongruent_clusters = find_incongruent_clusters(df, args.lesser_group, args.greater_group)
        
        if incongruent_clusters:
            print(f'\n    CSV: {args.csv_name}')
            print(f"    Incongruent clusters: {incongruent_clusters}\n")
        else:
            print(f'    CSV: {args.csv_name}')
            print("    No incongruent clusters found.\n")

    verbose_end_msg()



if __name__ == '__main__':
    main()