Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| # Prepare MoNuSeg Dataset By converting and resorting files | |
| # | |
| # @ Fabian Hörst, fabian.hoerst@uk-essen.de | |
| # Institute for Artifical Intelligence in Medicine, | |
| # University Medicine Essen | |
| from PIL import Image | |
| import xml.etree.ElementTree as ET | |
| from skimage import draw | |
| import numpy as np | |
| from pathlib import Path | |
| from typing import Union | |
| import argparse | |
| def convert_monuseg( | |
| input_path: Union[Path, str], output_path: Union[Path, str] | |
| ) -> None: | |
| """Convert the MoNuSeg dataset to a new format (1000 -> 1024, tiff to png and xml to npy) | |
| Args: | |
| input_path (Union[Path, str]): Input dataset | |
| output_path (Union[Path, str]): Output path | |
| """ | |
| input_path = Path(input_path) | |
| output_path = Path(output_path) | |
| output_path.mkdir(exist_ok=True, parents=True) | |
| # testing and training | |
| parts = ["testing", "training"] | |
| for part in parts: | |
| print(f"Prepare: {part}") | |
| input_path_part = input_path / part | |
| output_path_part = output_path / part | |
| output_path_part.mkdir(exist_ok=True, parents=True) | |
| (output_path_part / "images").mkdir(exist_ok=True, parents=True) | |
| (output_path_part / "labels").mkdir(exist_ok=True, parents=True) | |
| # images | |
| images = [f for f in sorted((input_path_part / "images").glob("*.tif"))] | |
| for img_path in images: | |
| loaded_image = Image.open(img_path) | |
| resized = loaded_image.resize( | |
| (1024, 1024), resample=Image.Resampling.LANCZOS | |
| ) | |
| new_img_path = output_path_part / "images" / f"{img_path.stem}.png" | |
| resized.save(new_img_path) | |
| # masks | |
| annotations = [f for f in sorted((input_path_part / "labels").glob("*.xml"))] | |
| for annot_path in annotations: | |
| binary_mask = np.transpose(np.zeros((1000, 1000))) | |
| # extract xml file | |
| tree = ET.parse(annot_path) | |
| root = tree.getroot() | |
| child = root[0] | |
| for x in child: | |
| r = x.tag | |
| if r == "Regions": | |
| element_idx = 1 | |
| for y in x: | |
| y_tag = y.tag | |
| if y_tag == "Region": | |
| regions = [] | |
| vertices = y[1] | |
| coords = np.zeros((len(vertices), 2)) | |
| for i, vertex in enumerate(vertices): | |
| coords[i][0] = vertex.attrib["X"] | |
| coords[i][1] = vertex.attrib["Y"] | |
| regions.append(coords) | |
| vertex_row_coords = regions[0][:, 0] | |
| vertex_col_coords = regions[0][:, 1] | |
| fill_row_coords, fill_col_coords = draw.polygon( | |
| vertex_col_coords, vertex_row_coords, binary_mask.shape | |
| ) | |
| binary_mask[fill_row_coords, fill_col_coords] = element_idx | |
| element_idx = element_idx + 1 | |
| inst_image = Image.fromarray(binary_mask) | |
| resized_mask = np.array( | |
| inst_image.resize((1024, 1024), resample=Image.Resampling.NEAREST) | |
| ) | |
| new_mask_path = output_path_part / "labels" / f"{annot_path.stem}.npy" | |
| np.save(new_mask_path, resized_mask) | |
| print("Finished") | |
| parser = argparse.ArgumentParser( | |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter, | |
| description="Convert the MoNuSeg dataset", | |
| ) | |
| parser.add_argument( | |
| "--input_path", | |
| type=str, | |
| help="Input path of the original MoNuSeg dataset", | |
| required=True, | |
| ) | |
| parser.add_argument( | |
| "--output_path", | |
| type=str, | |
| help="Output path to store the processed MoNuSeg dataset", | |
| required=True, | |
| ) | |
| if __name__ == "__main__": | |
| opt = parser.parse_args() | |
| configuration = vars(opt) | |
| input_path = Path(configuration["input_path"]) | |
| output_path = Path(configuration["output_path"]) | |
| convert_monuseg(input_path=input_path, output_path=output_path) | |