|
|
|
|
|
|
|
|
import tensorflow as tf |
|
|
import numpy as np |
|
|
import cv2 |
|
|
import os |
|
|
|
|
|
if tf.__version__ >= '2.0': |
|
|
print("tf version >= 2.0") |
|
|
tf = tf.compat.v1 |
|
|
tf.disable_eager_execution() |
|
|
|
|
|
|
|
|
class human_segmenter(object): |
|
|
def __init__(self, model_path,is_encrypted_model=False): |
|
|
super(human_segmenter, self).__init__() |
|
|
f = tf.gfile.FastGFile(model_path, 'rb') |
|
|
graph_def = tf.GraphDef() |
|
|
graph_def.ParseFromString(f.read()) |
|
|
persisted_graph = tf.import_graph_def(graph_def, name='') |
|
|
|
|
|
config = tf.ConfigProto() |
|
|
config.gpu_options.per_process_gpu_memory_fraction = 0.3 |
|
|
self.sess = tf.InteractiveSession(graph=persisted_graph, config=config) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("human_segmenter init done") |
|
|
|
|
|
def image_preprocess(self, img): |
|
|
if len(img.shape) == 2: |
|
|
img = np.dstack((img, img, img)) |
|
|
elif img.shape[2] == 4: |
|
|
img = img[:, :, :3] |
|
|
img = img[:, :, ::-1] |
|
|
img = img.astype(np.float32) |
|
|
return img |
|
|
|
|
|
def run(self, img): |
|
|
image_feed = self.image_preprocess(img) |
|
|
output_img_value, logits_value = self.sess.run([self.sess.graph.get_tensor_by_name("output_png:0"), self.sess.graph.get_tensor_by_name("if_person:0")], |
|
|
feed_dict={self.sess.graph.get_tensor_by_name("input_image:0"): image_feed}) |
|
|
|
|
|
output_img_value = cv2.cvtColor(output_img_value, cv2.COLOR_RGBA2BGRA) |
|
|
return output_img_value |
|
|
|
|
|
def run_head(self, img): |
|
|
image_feed = self.image_preprocess(img) |
|
|
|
|
|
output_alpha = self.sess.run(self.sess.graph.get_tensor_by_name('output_alpha:0'), |
|
|
feed_dict={'input_image:0': image_feed}) |
|
|
|
|
|
return output_alpha |
|
|
|
|
|
def get_human_bbox(self, mask): |
|
|
''' |
|
|
|
|
|
:param mask: |
|
|
:return: [x,y,w,h] |
|
|
''' |
|
|
print('dtype:{}, max:{},shape:{}'.format(mask.dtype, np.max(mask), mask.shape)) |
|
|
ret, thresh = cv2.threshold(mask,127,255,0) |
|
|
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) |
|
|
if len(contours) == 0: |
|
|
return None |
|
|
|
|
|
contoursArea = [cv2.contourArea(c) for c in contours] |
|
|
max_area_index = contoursArea.index(max(contoursArea)) |
|
|
bbox = cv2.boundingRect(contours[max_area_index]) |
|
|
return bbox |
|
|
|
|
|
|
|
|
def release(self): |
|
|
self.sess.close() |
|
|
|
|
|
|
|
|
class head_segmenter(object): |
|
|
def __init__(self, model_path, is_encrypted_model=False): |
|
|
super(head_segmenter, self).__init__() |
|
|
f = tf.gfile.FastGFile(model_path, 'rb') |
|
|
graph_def = tf.GraphDef() |
|
|
graph_def.ParseFromString(f.read()) |
|
|
persisted_graph = tf.import_graph_def(graph_def, name='') |
|
|
|
|
|
config = tf.ConfigProto() |
|
|
config.gpu_options.per_process_gpu_memory_fraction = 0.3 |
|
|
self.sess = tf.InteractiveSession(graph=persisted_graph, config=config) |
|
|
|
|
|
print("human_segmenter init done") |
|
|
|
|
|
def image_preprocess(self, img): |
|
|
if len(img.shape) == 2: |
|
|
img = np.dstack((img, img, img)) |
|
|
elif img.shape[2] == 4: |
|
|
img = img[:, :, :3] |
|
|
img = img[:, :, ::-1] |
|
|
img = img.astype(np.float32) |
|
|
return img |
|
|
|
|
|
def run_head(self, img): |
|
|
image_feed = self.image_preprocess(img) |
|
|
|
|
|
output_alpha = self.sess.run(self.sess.graph.get_tensor_by_name('output_alpha:0'), |
|
|
feed_dict={'input_image:0': image_feed}) |
|
|
|
|
|
return output_alpha |
|
|
|
|
|
def get_human_bbox(self, mask): |
|
|
''' |
|
|
|
|
|
:param mask: |
|
|
:return: [x,y,w,h] |
|
|
''' |
|
|
print('dtype:{}, max:{},shape:{}'.format(mask.dtype, np.max(mask), mask.shape)) |
|
|
ret, thresh = cv2.threshold(mask, 127, 255, 0) |
|
|
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) |
|
|
if len(contours) == 0: |
|
|
return None |
|
|
|
|
|
contoursArea = [cv2.contourArea(c) for c in contours] |
|
|
max_area_index = contoursArea.index(max(contoursArea)) |
|
|
bbox = cv2.boundingRect(contours[max_area_index]) |
|
|
return bbox |
|
|
|
|
|
def release(self): |
|
|
self.sess.close() |
|
|
|
|
|
|
|
|
class hair_segmenter(object): |
|
|
def __init__(self, model_dir, is_encrypted_model=False): |
|
|
head_path = os.path.join(model_dir, 'Matting_headparser_6_18.pb') |
|
|
face_path = os.path.join(model_dir, 'segment_face.pb') |
|
|
detect_path = os.path.join(model_dir, 'face_detect.pb') |
|
|
|
|
|
self.sess = self.load_sess(head_path) |
|
|
image = np.ones((512, 512, 3)) |
|
|
output_png = self.sess.run(self.sess.graph.get_tensor_by_name('output_alpha:0'), |
|
|
feed_dict={'input_image:0': image}) |
|
|
|
|
|
self.sess_detect = self.load_sess(detect_path) |
|
|
oboxes, scores, num_detections = self.sess_detect.run( |
|
|
[self.sess_detect.graph.get_tensor_by_name('tower_0/boxes:0'), |
|
|
self.sess_detect.graph.get_tensor_by_name('tower_0/scores:0'), |
|
|
self.sess_detect.graph.get_tensor_by_name('tower_0/num_detections:0')], |
|
|
feed_dict={'tower_0/images:0': image[np.newaxis], 'training_flag:0': False}) |
|
|
faceRects = [] |
|
|
|
|
|
self.sess_face = self.load_sess(face_path) |
|
|
image = np.ones((512, 512, 3)) |
|
|
output_alpha = self.sess_face.run(self.sess_face.graph.get_tensor_by_name('output_alpha_face:0'), |
|
|
feed_dict={'input_image_face:0': image}) |
|
|
|
|
|
def load_sess(self, model_path): |
|
|
config = tf.ConfigProto(allow_soft_placement=True) |
|
|
config.gpu_options.allow_growth = True |
|
|
sess = tf.Session(config=config) |
|
|
with tf.gfile.FastGFile(model_path, 'rb') as f: |
|
|
graph_def = tf.GraphDef() |
|
|
graph_def.ParseFromString(f.read()) |
|
|
sess.graph.as_default() |
|
|
tf.import_graph_def(graph_def, name='') |
|
|
sess.run(tf.global_variables_initializer()) |
|
|
return sess |
|
|
|
|
|
def image_preprocess(self, img): |
|
|
if len(img.shape) == 2: |
|
|
img = np.dstack((img, img, img)) |
|
|
elif img.shape[2] == 4: |
|
|
img = img[:, :, :3] |
|
|
img = img[:, :, ::-1] |
|
|
img = img.astype(np.float32) |
|
|
return img |
|
|
|
|
|
def run_head(self, image): |
|
|
image_feed = self.image_preprocess(image) |
|
|
output_img_value = self.sess.run(self.sess.graph.get_tensor_by_name('output_alpha:0'), |
|
|
feed_dict={'input_image:0': image_feed}) |
|
|
|
|
|
output_img_value = cv2.cvtColor(output_img_value, cv2.COLOR_RGBA2BGRA) |
|
|
return output_img_value |
|
|
|
|
|
def run(self, image): |
|
|
h, w, c = image.shape |
|
|
faceRects = self.detect_face(image) |
|
|
face_num = len(faceRects) |
|
|
print('face_num:{}'.format(face_num)) |
|
|
all_head_alpha = [] |
|
|
all_face_mask = [] |
|
|
for i in range(face_num): |
|
|
y1 = faceRects[i][0] |
|
|
y2 = faceRects[i][1] |
|
|
x1 = faceRects[i][2] |
|
|
x2 = faceRects[i][3] |
|
|
pad_y1, pad_y2, pad_x1, pad_x2 = self.pad_box(y1, y2, x1, x2, 0.15, 0.15, 0.15, 0.15, h, w) |
|
|
temp_img = image.copy() |
|
|
roi_img = temp_img[pad_y1:pad_y2, pad_x1:pad_x2] |
|
|
output_alpha = self.sess_face.run(self.sess_face.graph.get_tensor_by_name('output_alpha_face:0'), |
|
|
feed_dict={'input_image_face:0': roi_img[:, :, ::-1]}) |
|
|
face_mask = np.zeros((h, w, 3)) |
|
|
face_mask[pad_y1:pad_y2, pad_x1:pad_x2] = output_alpha |
|
|
all_face_mask.append(face_mask) |
|
|
|
|
|
|
|
|
|
|
|
for i in range(face_num): |
|
|
y1 = faceRects[i][0] |
|
|
y2 = faceRects[i][1] |
|
|
x1 = faceRects[i][2] |
|
|
x2 = faceRects[i][3] |
|
|
pad_y1, pad_y2, pad_x1, pad_x2 = self.pad_box(y1, y2, x1, x2, 1.47, 1.47, 1.3, 2.0, h, w) |
|
|
temp_img = image.copy() |
|
|
for j in range(face_num): |
|
|
y1 = faceRects[j][0] |
|
|
y2 = faceRects[j][1] |
|
|
x1 = faceRects[j][2] |
|
|
x2 = faceRects[j][3] |
|
|
small_y1, small_y2, small_x1, small_x2 = self.pad_box(y1, y2, x1, x2, -0.1, -0.1, -0.1, -0.1, h, w) |
|
|
small_width = small_x2 - small_x1 |
|
|
small_height = small_y2 - small_y1 |
|
|
if ( |
|
|
small_x1 < 0 or small_y1 < 0 or small_width < 3 or small_height < 3 or small_x2 > w or small_y2 > h): |
|
|
continue |
|
|
|
|
|
|
|
|
if (i != j): |
|
|
temp_img = temp_img * (1.0 - all_face_mask[j] / 255.0) |
|
|
|
|
|
roi_img = temp_img[pad_y1:pad_y2, pad_x1:pad_x2] |
|
|
output_alpha = self.sess.run(self.sess.graph.get_tensor_by_name('output_alpha:0'), |
|
|
feed_dict={'input_image:0': roi_img[:, :, ::-1]}) |
|
|
head_alpha = np.zeros((h, w)) |
|
|
head_alpha[pad_y1:pad_y2, pad_x1:pad_x2] = output_alpha[:, :, 2] |
|
|
all_head_alpha.append(head_alpha) |
|
|
|
|
|
print('all_head_alpha', all_head_alpha) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_face(self, img): |
|
|
h, w, c = img.shape |
|
|
input_img = cv2.resize(img[:, :, ::-1], (512, 512)) |
|
|
boxes, scores, num_detections = self.sess_detect.run( |
|
|
[self.sess_detect.graph.get_tensor_by_name('tower_0/boxes:0'), |
|
|
self.sess_detect.graph.get_tensor_by_name('tower_0/scores:0'), |
|
|
self.sess_detect.graph.get_tensor_by_name('tower_0/num_detections:0')], |
|
|
feed_dict={'tower_0/images:0': input_img[np.newaxis], 'training_flag:0': False}) |
|
|
faceRects = [] |
|
|
for i in range(num_detections[0]): |
|
|
if scores[0, i] < 0.5: |
|
|
continue |
|
|
y1 = np.int(boxes[0, i, 0] * h) |
|
|
x1 = np.int(boxes[0, i, 1] * w) |
|
|
y2 = np.int(boxes[0, i, 2] * h) |
|
|
x2 = np.int(boxes[0, i, 3] * w) |
|
|
if x2 <= x1 + 3 or y2 <= y1 + 3: |
|
|
continue |
|
|
faceRects.append((y1, y2, x1, x2, y2 - y1, x2 - x1)) |
|
|
sorted(faceRects, key=lambda x: x[4] * x[5], reverse=True) |
|
|
return faceRects |
|
|
|
|
|
def pad_box(self, y1, y2, x1, x2, left_ratio, right_ratio, top_ratio, bottom_ratio, h, w): |
|
|
box_w = x2 - x1 |
|
|
box_h = y2 - y1 |
|
|
pad_y1 = np.maximum(np.int(y1 - top_ratio * box_h), 0) |
|
|
pad_y2 = np.minimum(np.int(y2 + bottom_ratio * box_h), h - 1) |
|
|
pad_x1 = np.maximum(np.int(x1 - left_ratio * box_w), 0) |
|
|
pad_x2 = np.minimum(np.int(x2 + right_ratio * box_w), w - 1) |
|
|
return pad_y1, pad_y2, pad_x1, pad_x2 |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
img = cv2.imread('12345/images/0001.jpg') |
|
|
print(img.shape) |
|
|
fp = human_segmenter(model_path='assets/matting_human.pb') |
|
|
|
|
|
rgba = fp.run(img) |
|
|
|
|
|
print("test done") |
|
|
|