Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| # CLI | |
| # | |
| # @ Fabian Hörst, fabian.hoerst@uk-essen.de | |
| # Institute for Artifical Intelligence in Medicine, | |
| # University Medicine Essen | |
| import argparse | |
| import json | |
| import logging | |
| from copy import copy | |
| from pathlib import Path | |
| from typing import Any, List, Optional, Tuple | |
| import yaml | |
| from pydantic import BaseModel, validator | |
| from base_ml.base_cli import ABCParser | |
| from configs.python.config import ANNOTATION_EXT, LOGGING_EXT, WSI_EXT | |
| from utils.logger import Logger | |
| class PreProcessingYamlConfig(BaseModel): | |
| """For explanation, see PreProcessingParser""" | |
| # Set all to optional to allow selecting from yaml and argparse cli | |
| # dataset paths | |
| wsi_paths: Optional[str] | |
| output_path: Optional[str] | |
| wsi_extension: Optional[str] | |
| wsi_filelist: Optional[str] | |
| # basic setups | |
| patch_size: Optional[int] | |
| patch_overlap: Optional[float] | |
| target_mpp: Optional[float] | |
| target_mag: Optional[float] | |
| downsample: Optional[int] | |
| level: Optional[int] | |
| context_scales: Optional[List[int]] | |
| check_resolution: Optional[float] | |
| processes: Optional[int] | |
| overwrite: Optional[bool] | |
| # annotation specific settings | |
| annotation_paths: Optional[str] | |
| annotation_extension: Optional[str] | |
| incomplete_annotations: Optional[bool] | |
| label_map_file: Optional[str] | |
| save_only_annotated_patches: Optional[bool] | |
| exclude_classes: Optional[List[str]] | |
| store_masks: Optional[bool] | |
| generate_thumbnails: Optional[bool] | |
| overlapping_labels: Optional[bool] | |
| # macenko stain normalization | |
| normalize_stains: Optional[bool] | |
| normalization_vector_json: Optional[str] | |
| adjust_brightness: Optional[bool] | |
| # finding patches | |
| min_intersection_ratio: Optional[float] | |
| tissue_annotation: Optional[str] | |
| tissue_annotation_intersection_ratio: Optional[float] | |
| masked_otsu: Optional[bool] | |
| otsu_annotation: Optional[str] | |
| filter_patches: Optional[bool] | |
| apply_prefilter: Optional[bool] | |
| # other | |
| log_path: Optional[str] | |
| log_level: Optional[str] | |
| hardware_selection: Optional[str] | |
| wsi_properties: Optional[dict] | |
| class PreProcessingConfig(BaseModel): | |
| """Storing the preprocessing configuration | |
| All string that describe paths are converted to pathlib.Path objects. | |
| Args: | |
| wsi_paths (str): Path to the folder where all WSI are stored or path to a single WSI-file. | |
| output_path (str): Path to the folder where the resulting dataset should be stored. | |
| wsi_extension (str, optional): The extension of the WSI-files. Defaults to "svs. | |
| wsi_filelist (str, optional): Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used. Must include full paths to WSIs, including suffixes. | |
| Can be used as an replacement for the wsi_paths option. If both are provided, yields an error. Defaults to None. | |
| patch_size (int, optional): The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px. Defaults to 256. | |
| patch_overlap (float, optional): The percentage amount pixels that should overlap between two different patches. | |
| Please Provide as integer between 0 and 100, indicating overlap in percentage. | |
| Defaults to 0. | |
| target_mpp (float, optional): If this parameter is provided, the output level of the WSI | |
| corresponds to the level that is at the target microns per pixel of the WSI. | |
| Alternative to target_mag, downsaple and level. Highest priority, overwrites all other setups for magnifcation, downsample, or level. | |
| target_mag (float, optional): If this parameter is provided, the output level of the WSI | |
| corresponds to the level that is at the target magnification of the WSI. | |
| Alternative to target_mpp, downsaple and level. High priority, just target_mpp has a higher priority, overwrites downsample and level if provided. Defaults to None. | |
| downsample (int, optional): Each WSI level is downsampled by a factor of 2, downsample | |
| expresses which kind of downsampling should be used with | |
| respect to the highest possible resolution. Defaults to 0. | |
| level (int, optional): The tile level for sampling, alternative to downsample. Defaults to None. | |
| context_scales ([List[int], optional): Define context scales for context patches. Context patches are centered around a central patch. | |
| The context-patch size is equal to the patch-size, but downsampling is different. | |
| Defaults to None. | |
| check_resolution (float, optional): If a float value is supplies, the program checks whether | |
| the resolution of all images corresponds to the given value. | |
| Defaults to None. | |
| processes (int, optional): The number of processes to use. Defaults to 24 | |
| overwrite (bool, optional): Overwrite the patches that have already been created in | |
| case they already exist. Removes dataset. Handle with care! If false, skips already processed files from "processed.json". Defaults to False. | |
| annotation_paths (str, optional): Path to the subfolder where the annotations are | |
| stored or path to a file. Defaults to None. | |
| annotation_extension (str, optional): The extension types used for the annotation files. Defaults to None. | |
| incomplete_annotations (bool, optional): Set to allow WSI without annotation file. Defaults to False. | |
| label_map_file (str, optional): The path to a json file that contains the mapping between | |
| he annotation labels and some integers; an example can be found in examples. Defaults to None. | |
| label_map (dict, optional): Field to store the label mapping defined in the label map file. Gets overwriten by creation - to a dictionary with str: int. Do not pass values. | |
| Defaults to None. | |
| save_only_annotated_patches (bool, optional): If true only patches containing annotations will be stored. Defaults to False. | |
| exclude_classes (List[str], optional): Can be used to exclude annotation classes. Defaults to []. | |
| store_masks (bool, optional): Set to store masks per patch. Defaults to false. | |
| overlapping_labels (bool, optional): Per default, labels (annotations) are mutually exclusive. | |
| If labels overlap, they are overwritten according to the label_map.json ordering (highest number = highest priority). | |
| True means that the mask array is 3D with shape [patch_size, patch_size, len(label_map)], otherwise just [patch_size, patch_size]. | |
| Defaults to False. | |
| normalize_stains (bool, optional): Uses Macenko normalization on a portion of the whole slide images. Defaults to False. | |
| normalization_vector_json (str, optional): The path to a JSON file where the normalization vectors are stored. Defaults to None. | |
| adjust_brightness (bool, optional): Normalize brightness in a batch by clipping to 90 percent. Not recommended, but kept for legacy reasons. Defaults to False. | |
| min_intersection_ratio (float, optional): The minimum intersection between the tissue mask and the patch. | |
| Must be between 0 and 1. 0 means that all patches are extracted. Defaults to 0.01. | |
| tissue_annotation (str, optional): Can be used to name a polygon annotation to determine the tissue area | |
| If a tissue annotation is provided, no Otsu-thresholding is performed. Defaults to None. | |
| tissue_annotation_intersection_ratio (float, optional): Intersection ratio with tissue annotation. Helpful, if ROI annotation is passed, which should not interfere with background ratio. | |
| If not provided, the default min_intersection_ratio with the background is used. Defaults to None. | |
| masked_otsu (bool, optional): Use annotation to mask the thumbnail before otsu-thresholding is used. Defaults to False. | |
| otsu_annotation (bool, optional): Can be used to name a polygon annotation to determine the area | |
| for masked otsu thresholding. Seperate multiple labels with ' ' (whitespace). Defaults to None. | |
| filter_patches (bool, optional): Post-extraction patch filtering to sort out artefacts, marker and other non-tissue patches with a DL model. Time consuming. | |
| Defaults to False. | |
| apply_prefilter (bool, optional): Pre-extraction mask filtering to remove marker from mask before applying otsu. Defaults to False. | |
| log_path (str, optional): Path where log files should be stored. Otherwise, log files are stored in the output folder. Defaults to None. | |
| log_level (str, optional): Set the logging level. Defaults to "info". | |
| hardware_selection (str, optional): Select hardware device (just if available, otherwise always cucim). Defaults to "cucim". | |
| wsi_properties (dict, optional): Dictionary with manual WSI metadata, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). Supported keys are slide_mpp and magnification | |
| Raises: | |
| ValueError: Patch-size must be positive | |
| ValueError: At least 1 process is needed | |
| ValueError: Batch must contain at least 1 patch, recommended are 100-500. | |
| ValueError: Background ratio must be between 0 and 1. | |
| ValueError: Matching annotation type | |
| ValueError: Matching logging level | |
| ValueError: Matching WSI extension | |
| """ | |
| # dataset paths | |
| output_path: str | |
| wsi_paths: Optional[str] | |
| wsi_filelist: Optional[str] | |
| wsi_extension: Optional[str] = "svs" | |
| # basic setups | |
| patch_size: Optional[int] = 256 | |
| patch_overlap: Optional[float] = 0 | |
| downsample: Optional[int] = 1 | |
| target_mpp: Optional[float] | |
| target_mag: Optional[float] | |
| level: Optional[int] | |
| context_scales: Optional[List[int]] | |
| check_resolution: Optional[float] | |
| processes: Optional[int] = 24 | |
| overwrite: Optional[bool] = False | |
| # annotation specific settings | |
| annotation_paths: Optional[str] | |
| annotation_extension: Optional[str] | |
| incomplete_annotations: Optional[bool] = False | |
| label_map_file: Optional[str] | |
| label_map: Optional[dict] | |
| save_only_annotated_patches: Optional[bool] = False | |
| exclude_classes: Optional[List[str]] = [] | |
| store_masks: Optional[bool] = False | |
| overlapping_labels: Optional[bool] = False | |
| # macenko stain normalization | |
| normalize_stains: Optional[bool] = False | |
| normalization_vector_json: Optional[str] | |
| adjust_brightness: Optional[bool] = False | |
| # finding patches | |
| min_intersection_ratio: Optional[float] = 0.01 | |
| tissue_annotation: Optional[str] | |
| tissue_annotation_intersection_ratio: Optional[float] | |
| masked_otsu: Optional[bool] = False | |
| otsu_annotation: Optional[str] | |
| filter_patches: Optional[bool] = False | |
| apply_prefilter: Optional[bool] = False | |
| # other | |
| log_path: Optional[str] | |
| log_level: Optional[str] = "info" | |
| hardware_selection: Optional[str] = "cucim" | |
| wsi_properties: Optional[dict] | |
| def __init__(__pydantic_self__, **data: Any) -> None: | |
| super().__init__(**data) | |
| __pydantic_self__.__post_init_post_parse__() | |
| # validators | |
| def patch_size_must_be_positive(cls, v): | |
| if v <= 0: | |
| raise ValueError("Patch-Size in pixels must be positive") | |
| return v | |
| def overlap_percentage(cls, v): | |
| if v < 0 and v >= 100: | |
| raise ValueError( | |
| "Patch-Overlap in percentage must be between 0 and 100 (100 not included)" | |
| ) | |
| return v | |
| def processes_must_be_positive(cls, v): | |
| if v <= 0: | |
| raise ValueError("At least 1 process is needed") | |
| return v | |
| def min_intersection_ratio_range_check(cls, v): | |
| if v < 0 and v > 1: | |
| raise ValueError("Background ratio must be between 0 and 1") | |
| return v | |
| def annotation_extension_selector(cls, v): | |
| if v not in ANNOTATION_EXT: | |
| raise ValueError( | |
| f"The extension types used for the annotation files is wrong, the options are: {ANNOTATION_EXT}" | |
| ) | |
| return v | |
| def log_level_check(cls, v): | |
| if v not in LOGGING_EXT: | |
| raise ValueError(f"Wrong logging level. Options are {LOGGING_EXT}") | |
| return v.upper() | |
| def wsi_extension_selector(cls, v): | |
| if v not in WSI_EXT: | |
| raise ValueError( | |
| f"The extension types used for the WSI files is wrong, the options are: {WSI_EXT}" | |
| ) | |
| return v | |
| def __post_init_post_parse__(self): | |
| """Post processing after parsing. | |
| Converting paths to `Pathlib` object, convert strings and stored dict. | |
| Raises: | |
| RuntimeError: Please provide either wsi_paths or wsi_filelist argument | |
| ValueError: A label map file must be used if annotations are passed | |
| ValueError: Checking for right label_map format (.json) file. | |
| """ | |
| if (self.wsi_paths is None and self.wsi_filelist is None) or ( | |
| self.wsi_paths is not None and self.wsi_filelist is not None | |
| ): | |
| raise RuntimeError( | |
| "Please provide either wsi_paths or wsi_filelist argument!" | |
| ) | |
| self.output_path = Path(self.output_path).resolve() | |
| if self.wsi_paths is not None: | |
| self.wsi_paths = Path(self.wsi_paths).resolve() | |
| if self.wsi_filelist is not None: | |
| self.wsi_filelist = Path(self.wsi_filelist).resolve() | |
| if self.annotation_paths is not None: | |
| self.annotation_paths = Path(self.annotation_paths).resolve() | |
| if self.label_map_file is None: | |
| raise ValueError( | |
| "Please provide label_map_file if annoations should be used" | |
| ) | |
| else: | |
| self.label_map_file = Path(self.label_map_file).resolve() | |
| if self.label_map_file.suffix != ".json": | |
| raise ValueError("Please provide label_map_file as json file") | |
| with open(str(self.label_map_file)) as json_file: | |
| label_map = json.load(json_file) | |
| self.label_map = {k.lower(): v for k, v in label_map.items()} | |
| if self.label_map_file is None or self.label_map is None: | |
| self.label_map = {"background": 0} | |
| if self.log_path is None: | |
| self.log_path = self.output_path | |
| if self.otsu_annotation is not None: | |
| self.otsu_annotation = self.otsu_annotation.lower() | |
| if self.tissue_annotation is not None: | |
| self.tissue_annotation = self.tissue_annotation.lower() | |
| if len(self.exclude_classes) > 0: | |
| self.exclude_classes = [f.lower() for f in self.exclude_classes] | |
| if self.tissue_annotation_intersection_ratio is None: | |
| self.tissue_annotation_intersection_ratio = self.min_intersection_ratio | |
| else: | |
| if self.tissue_annotation_intersection_ratio < 0 and self.tissue_annotation_intersection_ratio > 1: | |
| raise RuntimeError("Tissue_annotation_intersection_ratio must be between 0 and 1") | |
| class PreProcessingParser(ABCParser): | |
| """Configuration Parser for Preprocessing""" | |
| def __init__(self) -> None: | |
| parser = argparse.ArgumentParser( | |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
| ) | |
| # dataset paths | |
| parser.add_argument( | |
| "--wsi_paths", | |
| type=str, | |
| help="Path to the folder where all WSI are stored or path to a single WSI-file.", | |
| ) | |
| parser.add_argument( | |
| "--wsi_filelist", | |
| type=str, | |
| help="Path to a csv-filelist with WSI files (separator: `,`), if provided just these files are used." | |
| "Must include full paths to WSIs, including suffixes." | |
| "Can be used as an replacement for the wsi_paths option." | |
| "If both are provided, yields an error.", | |
| ) | |
| parser.add_argument( | |
| "--output_path", | |
| type=str, | |
| help="Path to the folder where the resulting dataset should be stored.", | |
| ) | |
| parser.add_argument( | |
| "--wsi_extension", | |
| type=str, | |
| choices=WSI_EXT, | |
| help="The extension types used for the WSI files, the " | |
| "options are: " + str(WSI_EXT), | |
| ) | |
| parser.add_argument( | |
| "--config", | |
| type=str, | |
| help="Path to a config file. The config file can hold the same parameters as the CLI. " | |
| "Parameters provided with the CLI are always having precedence over the parameters in the config file.", | |
| ) | |
| # basic setup | |
| parser.add_argument( | |
| "--patch_size", | |
| type=int, | |
| help="The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px", | |
| ) | |
| parser.add_argument( | |
| "--patch_overlap", | |
| type=float, | |
| help="The percentage amount pixels that should overlap between two different patches. " | |
| "Please Provide as integer between 0 and 100, indicating overlap in percentage.", | |
| ) | |
| parser.add_argument( | |
| "--target_mpp", | |
| type=float, | |
| help="If this parameter is provided, the output level of the WSI " | |
| "corresponds to the level that is at the target microns per pixel of the WSI. " | |
| "Alternative to target_mag, downsaple and level. Highest priority, overwrites all other setups for magnifcation, downsample, or level.", | |
| ) | |
| parser.add_argument( | |
| "--target_mag", | |
| type=float, | |
| help="If this parameter is provided, the output level of the WSI " | |
| "corresponds to the level that is at the target magnification of the WSI. " | |
| "Alternative to target_mpp, downsaple and level. High priority, just target_mpp has a higher priority, overwrites downsample and level if provided.", | |
| ) | |
| parser.add_argument( | |
| "--downsample", | |
| type=int, | |
| help="Each WSI level is downsampled by a factor of 2, downsample " | |
| "expresses which kind of downsampling should be used with " | |
| "respect to the highest possible resolution. Medium priority, gets overwritten by target_mag and target_mpp if provided, " | |
| "but overwrites level.", | |
| ) | |
| parser.add_argument( | |
| "--level", | |
| type=int, | |
| help="The tile level for sampling, alternative to downsample. " | |
| "Lowest priority, gets overwritten by target_mag and downsample if they are provided. ", | |
| ) | |
| parser.add_argument( | |
| "--context_scales", | |
| nargs="*", | |
| type=int, | |
| help="Define context scales for context patches. Context patches are centered around a central patch. " | |
| "The context-patch size is equal to the patch-size, but downsampling is different", | |
| ) | |
| parser.add_argument( | |
| "--check_resolution", | |
| type=float, | |
| help="If a float value is supplies, the program checks whether " | |
| "the resolution of all images corresponds to the given " | |
| "value", | |
| ) | |
| parser.add_argument( | |
| "--processes", | |
| type=int, | |
| help="The number of processes to use.", | |
| ) | |
| parser.add_argument( | |
| "--overwrite", | |
| action="store_true", | |
| default=None, | |
| help="Overwrite the patches that have already been created in " | |
| "case they already exist. Removes dataset. Handle with care!", | |
| ) | |
| # annotation specific settings | |
| parser.add_argument( | |
| "--annotation_paths", | |
| type=str, | |
| help="Path to the subfolder where the XML/JSON annotations are " | |
| "stored or path to a file", | |
| ) | |
| parser.add_argument( | |
| "--annotation_extension", | |
| type=str, | |
| choices=ANNOTATION_EXT, | |
| help="The extension types used for the annotation files, the " | |
| "options are: " + str(ANNOTATION_EXT), | |
| ) | |
| parser.add_argument( | |
| "--incomplete_annotations", | |
| action="store_true", | |
| default=None, | |
| help="Set to allow WSI without annotation file", | |
| ) | |
| parser.add_argument( | |
| "--label_map_file", | |
| type=str, | |
| help="The path to a json file that contains the mapping between" | |
| " the annotation labels and some integers; an example can " | |
| "be found in examples", | |
| ) | |
| parser.add_argument( | |
| "--save_only_annotated_patches", | |
| action="store_true", | |
| default=None, | |
| help="If true only patches containing annotations will be stored", | |
| ) | |
| parser.add_argument( | |
| "--exclude_classes", | |
| action="append", | |
| default=None, | |
| help="Can be used to exclude annotation classes", | |
| ) | |
| parser.add_argument( | |
| "--store_masks", | |
| action="store_true", | |
| default=None, | |
| help="Set to store masks per patch. Defaults to false", | |
| ) | |
| parser.add_argument( | |
| "--overlapping_labels", | |
| action="store_true", | |
| default=None, | |
| help="Per default, labels (annotations) are mutually exclusive. " | |
| "If labels overlap, they are overwritten according to the label_map.json ordering" | |
| " (highest number = highest priority)", | |
| ) | |
| # macenko stain normalization | |
| parser.add_argument( | |
| "--normalize_stains", | |
| action="store_true", | |
| default=None, | |
| help="Uses Macenko normalization on a portion of the whole " "slide image", | |
| ) | |
| parser.add_argument( | |
| "--normalization_vector_json", | |
| type=str, | |
| help="The path to a JSON file where the normalization vectors are stored", | |
| ) | |
| parser.add_argument( | |
| "--adjust_brightness", | |
| action="store_true", | |
| default=None, | |
| help="Normalize brightness in a batch by clipping to 90 percent. Not recommended, but kept for legacy reasons", | |
| ) | |
| # finding patches | |
| parser.add_argument( | |
| "--min_intersection_ratio", | |
| type=float, | |
| help="The minimum intersection between the tissue mask and the patch. " | |
| "Must be between 0 and 1. 0 means that all patches are extracted.", | |
| ) | |
| parser.add_argument( | |
| "--tissue_annotation", | |
| type=str, | |
| help="Can be used to name a polygon annotation to determine the tissue area. " | |
| "If a tissue annotation is provided, no Otsu-thresholding is performed", | |
| ) | |
| parser.add_argument( | |
| "--tissue_annotation_intersection_ratio", | |
| type=float, | |
| help="Intersection ratio with tissue annotation. Helpful, if ROI annotation is passed, " | |
| "which should not interfere with background ratio. If not provided, the default min_intersection_ratio with the background is used." | |
| ) | |
| parser.add_argument( | |
| "--masked_otsu", | |
| action="store_true", | |
| default=None, | |
| help="Use annotation to mask the thumbnail before otsu-thresholding is used", | |
| ) | |
| parser.add_argument( | |
| "--otsu_annotation", | |
| type=str, | |
| help="Can be used to name a polygon annotation to determine the area " | |
| "for masked otsu thresholding. Seperate multiple labels with ' ' (whitespace)", | |
| ) | |
| parser.add_argument( | |
| "--filter_patches", | |
| action="store_true", | |
| default=None, | |
| help="Post-extraction patch filtering to sort out artefacts, marker and other non-tissue patches with a DL model. Time consuming. Defaults to False.", | |
| ) | |
| parser.add_argument( | |
| "--apply_prefilter", | |
| action="store_true", | |
| default=None, | |
| help="Pre-extraction mask filtering to remove marker from mask before applying otsu. Defaults to False.", | |
| ) | |
| # other | |
| parser.add_argument( | |
| "--log_path", | |
| type=str, | |
| help="Path where log files should be stored. Otherwise, log files are stored in the output folder", | |
| ) | |
| parser.add_argument( | |
| "--log_level", | |
| type=str, | |
| choices=LOGGING_EXT, | |
| help=f"Set the logging level. Options are {LOGGING_EXT}", | |
| ) | |
| parser.add_argument( | |
| "--hardware_selection", | |
| type=str, | |
| choices=["cucim", "openslide"], | |
| help="Select hardware device (just if available, otherwise always cucim). Defaults to cucim.", | |
| ) | |
| parser.add_argument( | |
| "--wsi_properties", | |
| type=dict, | |
| help="Dictionary with manual WSI metadata, but just applies if metadata cannot be derived from OpenSlide (e.g., for .tiff files). Supported keys are slide_mpp and magnification", | |
| ) | |
| self.parser = parser | |
| def get_config(self) -> Tuple[PreProcessingConfig, logging.Logger]: | |
| """Setup function for the CLI-configuration. | |
| At first, all CLI arguments are loaded. Then the provided configuration file | |
| (needs to be a `.yaml` file) is loaded. CLI arguments are having a higher priority than | |
| arguments stored in the configuration file. | |
| The configuration is stored as an :obj:`~preprocessing.src.cli.PreProcessingConfig` object. | |
| A logger object is instantiated and returned. | |
| Raises: | |
| ValueError: The provided configuration file must be a yaml file. | |
| Returns: | |
| - PreProcessingConfig: Preprocessing configuration | |
| - logging.Logger: Logging object | |
| """ | |
| opt = self.parser.parse_args() | |
| if opt.config is not None: | |
| opt_dict = vars(opt) | |
| if Path(opt.config).suffix != ".yaml": | |
| raise ValueError("Please provide config file as `.yaml` file") | |
| with open(opt.config, "r") as config_file: | |
| yaml_config = yaml.safe_load(config_file) | |
| yaml_config = PreProcessingYamlConfig(**yaml_config) | |
| # convert to dict and override missing values | |
| yaml_config_dict = dict(yaml_config) | |
| for k, v in opt_dict.items(): | |
| if v is None: | |
| if yaml_config_dict[k] is not None: | |
| opt_dict[k] = yaml_config_dict[k] | |
| opt_dict = {k: v for k, v in opt_dict.items() if v is not None} | |
| else: | |
| opt_dict = vars(opt) | |
| opt_dict = {k: v for k, v in opt_dict.items() if v is not None} | |
| # generate final setup | |
| self.preprocessconfig = PreProcessingConfig(**opt_dict) | |
| # create logger | |
| preprocess_logger = Logger( | |
| level=self.preprocessconfig.log_level.upper(), | |
| log_dir=self.preprocessconfig.log_path, | |
| comment="preprocessing", | |
| use_timestamp=True, | |
| ) | |
| self.logger = preprocess_logger.create_logger() | |
| self.logger.debug("Parsed CLI without errors. Logger instantiated.") | |
| return self.preprocessconfig, self.logger | |
| def store_config(self) -> None: | |
| """Store the config file in the logging directory to keep track of the configuration.""" | |
| # get dict and convert paths to str | |
| config_repr = self.preprocessconfig.dict() | |
| config_repr_str = { | |
| k: str(v) for k, v in config_repr.items() if isinstance(v, Path) | |
| } | |
| for k, v in config_repr_str.items(): | |
| config_repr[k] = v | |
| # store in log directory | |
| with open(self.preprocessconfig.log_path / "config.yaml", "w") as yaml_file: | |
| yaml.dump(config_repr, yaml_file, sort_keys=False) | |
| self.logger.debug( | |
| f"Stored config under: {str(self.preprocessconfig.log_path / 'config.yaml')}" | |
| ) | |
| class MacenkoYamlConfig(PreProcessingYamlConfig): | |
| wsi_path: Optional[str] | |
| save_json_path: Optional[str] | |
| class MacenkoConfig(PreProcessingConfig): | |
| save_json_path: str | |
| class MacenkoParser(ABCParser): | |
| """Macenko Vector Calculation CLI""" | |
| def __init__(self) -> None: | |
| parser = argparse.ArgumentParser( | |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
| ) | |
| # dataset paths | |
| parser.add_argument( | |
| "--wsi_path", | |
| type=str, | |
| help="Path to a single WSI-file.", | |
| ) | |
| parser.add_argument( | |
| "--wsi_extension", | |
| type=str, | |
| choices=WSI_EXT, | |
| help="The extension types used for the WSI file, the " | |
| "options are: " + str(WSI_EXT), | |
| ) | |
| parser.add_argument( | |
| "--save_json_path", | |
| type=str, | |
| help="The path to a JSON file where the normalization vectors are going to be stored", | |
| ) | |
| parser.add_argument( | |
| "--config", | |
| type=str, | |
| help="Path to a config file. The config file can hold the same parameters as the CLI. " | |
| "Parameters provided with the CLI are always having precedence over the parameters in the config file.", | |
| ) | |
| # basic setup | |
| parser.add_argument( | |
| "--patch_size", | |
| type=int, | |
| help="The size of the patches in pixel that will be retrieved from the WSI, e.g. 256 for 256px", | |
| ) | |
| parser.add_argument( | |
| "--patch_overlap", | |
| type=float, | |
| help="The percentage amount pixels that should overlap between two different patches. " | |
| "Please Provide as integer between 0 and 100, indicating overlap in percentage.", | |
| ) | |
| parser.add_argument( | |
| "--downsample", | |
| type=int, | |
| help="Each WSI level is downsampled by a factor of 2, downsample " | |
| "expresses which kind of downsampling should be used with " | |
| "respect to the highest possible resolution. Medium priority, gets overwritten by target_mag if provided, " | |
| "but overwrites level.", | |
| ) | |
| parser.add_argument( | |
| "--target_mag", | |
| type=float, | |
| help="If this parameter is provided, the output level of the WSI " | |
| "corresponds to the level that is at the target magnification of the WSI. " | |
| "Alternative to downsaple and level. Highest priority, overwrites downsample and level if provided.", | |
| ) | |
| parser.add_argument( | |
| "--level", | |
| type=int, | |
| help="The tile level for sampling, alternative to downsample. " | |
| "Lowest priority, gets overwritten by target_mag and downsample if they are provided. ", | |
| ) | |
| # annotations | |
| parser.add_argument( | |
| "--annotation_paths", | |
| type=str, | |
| help="Path to the subfolder where the XML/JSON annotations are " | |
| "stored or path to a file", | |
| ) | |
| parser.add_argument( | |
| "--annotation_extension", | |
| type=str, | |
| choices=ANNOTATION_EXT, | |
| help="The extension types used for the annotation files, the " | |
| "options are: " + str(ANNOTATION_EXT), | |
| ) | |
| parser.add_argument( | |
| "--label_map_file", | |
| type=str, | |
| help="The path to a json file that contains the mapping between" | |
| " the annotation labels and some integers; an example can " | |
| "be found in examples", | |
| ) | |
| parser.add_argument( | |
| "--save_only_annotated_patches", | |
| action="store_true", | |
| default=None, | |
| help="If true only patches containing annotations will be stored", | |
| ) | |
| parser.add_argument( | |
| "--exclude_classes", | |
| action="append", | |
| default=None, | |
| help="Can be used to exclude annotation classes", | |
| ) | |
| # appearance | |
| parser.add_argument( | |
| "--adjust_brightness", | |
| action="store_true", | |
| default=None, | |
| help="Normalize brightness in a batch by clipping to 90 percen0. Not recommended, but kept for legacy reasonst", | |
| ) | |
| # finding patches | |
| parser.add_argument( | |
| "--min_intersection_ratio", | |
| type=float, | |
| help="The minimum intersection between the tissue mask and the patch. " | |
| "Must be between 0 and 1. 0 means that all patches are extracted.", | |
| ) | |
| parser.add_argument( | |
| "--tissue_annotation", | |
| type=str, | |
| help="Can be used to name a polygon annotation to determine the tissue area. " | |
| "If a tissue annotation is provided, no Otsu-thresholding is performed", | |
| ) | |
| parser.add_argument( | |
| "--masked_otsu", | |
| action="store_true", | |
| default=None, | |
| help="Use annotation to mask the thumbnail before otsu-thresholding is used", | |
| ) | |
| parser.add_argument( | |
| "--otsu_annotation", | |
| type=str, | |
| help="Can be used to name a polygon annotation to determine the area " | |
| "for masked otsu thresholding. Seperate multiple labels with ' ' (whitespace)", | |
| ) | |
| # other | |
| parser.add_argument( | |
| "--log_path", | |
| type=str, | |
| help="Path where log files should be stored. Otherwise, log files are stored in the output folder", | |
| ) | |
| parser.add_argument( | |
| "--log_level", | |
| type=str, | |
| choices=LOGGING_EXT, | |
| help=f"Set the logging level. Options are {LOGGING_EXT}", | |
| ) | |
| parser | |
| self.parser = parser | |
| self.default_dict = { | |
| "check_resolution": False, | |
| "processes": 1, | |
| "overwrite": False, | |
| "store_masks": False, | |
| "overlapping_labels": False, | |
| "normalization_vector_json": None, | |
| "normalize_stains": False, | |
| } | |
| def get_config(self) -> Tuple[MacenkoConfig, logging.Logger]: | |
| opt = self.parser.parse_args() | |
| if opt.config is not None: | |
| if Path(opt.config).suffix != ".yaml": | |
| raise ValueError("Please provide config file as `.yaml` file") | |
| with open(opt.config, "r") as config_file: | |
| yaml_config = yaml.safe_load(config_file) | |
| yaml_config = MacenkoYamlConfig(**yaml_config) | |
| # convert to dict and override missing values | |
| opt_dict = vars(opt) | |
| yaml_config_dict = dict(yaml_config) | |
| for k, v in opt_dict.items(): | |
| if v is None: | |
| if yaml_config_dict[k] is not None: | |
| opt_dict[k] = yaml_config_dict[k] | |
| opt_dict = {k: v for k, v in opt_dict.items() if v is not None} | |
| opt_dict["wsi_paths"] = copy(opt_dict["wsi_path"]) | |
| opt_dict.pop("wsi_path") | |
| # overwrite hard coded options | |
| for k, v in self.default_dict.items(): | |
| opt_dict[k] = v | |
| assert ( | |
| Path(opt_dict["save_json_path"]).suffix == ".json" | |
| ), "Output path must be a .json file" | |
| opt_dict["output_path"] = str(Path(opt_dict["save_json_path"]).parent) | |
| self.preprocessconfig = MacenkoConfig(**opt_dict) | |
| # create logger | |
| preprocess_logger = Logger( | |
| level=self.preprocessconfig.log_level.upper(), | |
| log_dir=self.preprocessconfig.log_path, | |
| comment="preprocessing", | |
| use_timestamp=True, | |
| ) | |
| self.logger = preprocess_logger.create_logger() | |
| self.logger.debug("Parsed CLI without errors. Logger instantiated.") | |
| return self.preprocessconfig, self.logger | |
| def store_config(self) -> None: | |
| """Store the config file in the logging directory to keep track of the configuration.""" | |
| pass | |