Spaces:

xiazhi
/

LKCell

Sleeping

App Files Files Community

LKCell / cell_segmentation /datasets /prepare_pannuke_origin.py

qingke1

initial commit

aea73e2 over 1 year ago

raw

history blame contribute delete

3.21 kB

	# -- coding: utf-8 --
	# Prepare MoNuSeg Dataset By converting and resorting files
	#
	# @ Fabian Hörst, fabian.hoerst@uk-essen.de
	# Institute for Artifical Intelligence in Medicine,
	# University Medicine Essen

	import inspect
	import os
	import sys

	currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
	parentdir = os.path.dirname(currentdir)
	sys.path.insert(0, parentdir)
	parentdir = os.path.dirname(parentdir)
	sys.path.insert(0, parentdir)

	import numpy as np
	from pathlib import Path
	from PIL import Image
	from tqdm import tqdm
	import argparse
	from cell_segmentation.utils.metrics import remap_label


	def process_fold(fold, input_path, output_path) -> None:
	fold_path = Path(input_path) / f"fold{fold}"
	output_fold_path = Path(output_path) / f"fold{fold}"
	output_fold_path.mkdir(exist_ok=True, parents=True)
	(output_fold_path / "images").mkdir(exist_ok=True, parents=True)
	(output_fold_path / "labels").mkdir(exist_ok=True, parents=True)

	print(f"Fold: {fold}")
	print("Loading large numpy files, this may take a while")
	images = np.load(fold_path / "images.npy")
	masks = np.load(fold_path / "masks.npy")

	print("Process images")
	for i in tqdm(range(len(images)), total=len(images)):
	outname = f"{fold}_{i}.png"
	out_img = images[i]
	im = Image.fromarray(out_img.astype(np.uint8))
	im.save(output_fold_path / "images" / outname)

	print("Process masks")
	for i in tqdm(range(len(images)), total=len(images)):
	outname = f"{fold}_{i}.npy"

	# need to create instance map and type map with shape 256x256
	mask = masks[i]
	inst_map = np.zeros((256, 256))
	num_nuc = 0
	for j in range(5):
	# copy value from new array if value is not equal 0
	layer_res = remap_label(mask[:, :, j])
	# inst_map = np.where(mask[:,:,j] != 0, mask[:,:,j], inst_map)
	inst_map = np.where(layer_res != 0, layer_res + num_nuc, inst_map)
	num_nuc = num_nuc + np.max(layer_res)
	inst_map = remap_label(inst_map)

	type_map = np.zeros((256, 256)).astype(np.int32)
	for j in range(5):
	layer_res = ((j + 1) * np.clip(mask[:, :, j], 0, 1)).astype(np.int32)
	type_map = np.where(layer_res != 0, layer_res, type_map)

	outdict = {"inst_map": inst_map, "type_map": type_map}
	np.save(output_fold_path / "labels" / outname, outdict)


	parser = argparse.ArgumentParser(
	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
	description="Perform CellViT inference for given run-directory with model checkpoints and logs",
	)
	parser.add_argument(
	"--input_path",
	type=str,
	help="Input path of the original PanNuke dataset",
	required=True,
	)
	parser.add_argument(
	"--output_path",
	type=str,
	help="Output path to store the processed PanNuke dataset",
	required=True,
	)

	if __name__ == "__main__":
	opt = parser.parse_args()
	configuration = vars(opt)

	input_path = Path(configuration["input_path"])
	output_path = Path(configuration["output_path"])

	for fold in [0, 1, 2]:
	process_fold(fold, input_path, output_path)