Source code for unravel.cluster_stats.find_incongruent_clusters
#!/usr/bin/env python3
"""
Use ``cstats_find_incongruent`` (``cfi``) from UNRAVEL if ``cstats_fdr`` was used to convert non-directional p value maps into directional cluster indices. This helps to find clusters where the direction of the mean intensity difference between groups does not match direction of the difference in cell/label density between groups.
Input:
- ./_cluster_validation_info/tukey_results.csv or ttest_results.csv from ``cstats``
- Columns: 'cluster_ID', 'comparison', 'higher_mean_group', 'p-value', 'significance'
Output:
- Cluster IDs where the mean intensity difference does not match the density difference between groups A and B.
Note:
- This is modules is useful to find clusters where z-scoring introduces incongruencies between the mean intensity difference and the density difference.
- For example, if group A has increased IF in region A and not B, z-scoring may decrease the relative intensity of region B.
- This decrease for region B for one group, may introduce a difference in the mean intensity between groups that is not reflected in the density difference.
Usage:
------
cstats_find_incongruent -c tukey_results.csv -l groupA -g groupB [-v]
"""
from pathlib import Path
import pandas as pd
from glob import glob
from rich import print
from rich.traceback import install
from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg
[docs]
def parse_args():
parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__)
reqs = parser.add_argument_group('Required arguments')
reqs.add_argument('-c', '--csv_name', help='Name of the CSV file.', required=True, action=SM)
reqs.add_argument('-l', '--lesser_group', help='Group with a lower mean for the comparison of interest.', required=True, action=SM)
reqs.add_argument('-g', '--greater_group', help='Group with a higher mean for the comparison of interest.', required=True, action=SM)
general = parser.add_argument_group('General arguments')
general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False)
return parser.parse_args()
[docs]
def find_incongruent_clusters(df, expected_lower_mean_group, expected_higher_mean_group):
# Determine the comparison string (e.g. 'groupA vs groupB' in the 'comparison' column)
comparison_str1 = f'{expected_lower_mean_group} vs {expected_higher_mean_group}'
comparison_str2 = f'{expected_higher_mean_group} vs {expected_lower_mean_group}'
# Filter data based on the comparison string
filtered_df = df[
(df['comparison'] == comparison_str1) |
(df['comparison'] == comparison_str2)
]
# Find clusters that are significant and incongruent with the prediction
incongruent_clusters = filtered_df[
(filtered_df['significance'] != 'n.s.') &
(filtered_df['higher_mean_group'] != expected_higher_mean_group)
]['cluster_ID'].tolist()
return incongruent_clusters
[docs]
@log_command
def main():
install()
args = parse_args()
Configuration.verbose = args.verbose
verbose_start_msg()
current_dir = Path.cwd()
# Consctruct substring to find matching subdirs
substring_str1 = f'{args.greater_group}_gt_{args.lesser_group}'
substring_str2 = f'{args.lesser_group}_lt_{args.greater_group}'
# Iterate over all subdirectories in the current working directory
for subdir in [d for d in current_dir.iterdir() if d.is_dir() and (substring_str1 in d.name or substring_str2 in d.name)]:
print(f"\nProcessing directory: [default bold]{subdir.name}[/]")
df = pd.read_csv(subdir / "_valid_clusters_stats" / args.csv_name)
incongruent_clusters = find_incongruent_clusters(df, args.lesser_group, args.greater_group)
if incongruent_clusters:
print(f'\n CSV: {args.csv_name}')
print(f" Incongruent clusters: {incongruent_clusters}\n")
else:
print(f' CSV: {args.csv_name}')
print(" No incongruent clusters found.\n")
verbose_end_msg()
if __name__ == '__main__':
main()