win10 tensorflow MTCNN Demo
生活随笔
收集整理的這篇文章主要介紹了
win10 tensorflow MTCNN Demo
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
doc mark here
win10安裝的tensorflow是python3.5版本的,因此還要安裝對應版本的opencv,python opencv下載地址:
http://www.lfd.uci.edu/~gohlke/pythonlibs/
選擇32位還是64位的文件,比如:opencv_python-3.1.0-cp35-cp35m-win_amd64.whl
在python 安裝地址下的 \Scripts有pip.exe ?運行?
pip.exe install opencv_python-3.1.0-cp35-cp35m-win32.whlMTCNN 入口代碼
#coding = gbk import tensorflow as tf import numpy as np import cv2 import detect_face import time#face detection parameters minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factordef to_rgb(img):w, h = img.shaperet = np.empty((w, h, 3), dtype=np.uint8)ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = imgreturn ret# restore mtcnn model print('Creating networks and loading parameters') gpu_memory_fraction = 1.0 with tf.Graph().as_default():gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))with sess.as_default():pnet, rnet, onet = detect_face.create_mtcnn(sess, './model_check_point/')video_capture = cv2.VideoCapture(0) while True:# Capture frame-by-frameret, frame = video_capture.read()gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)if gray.ndim == 2:img = to_rgb(gray)start = time.time()bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)end = time.time()print("current frame processing time : %.2fms"%((end-start)*1000))nrof_faces = bounding_boxes.shape[0] # number of faces# print('找到人臉數(shù)目為:{}'.format(nrof_faces))for face_position in bounding_boxes:face_position = face_position.astype(int)cv2.rectangle(frame,(face_position[0], face_position[1]),(face_position[2], face_position[3]),(0, 255, 0), 2)# print(faces)cv2.imshow('MTCNN Demo', frame)if cv2.waitKey(30) & 0xFF == ord('q'):breakvideo_capture.release() cv2.destroyAllWindows()核心實現(xiàn)代碼,代碼中做了一些修改,主要是因為python版本的緣故,這里的代碼可以在python3.5的環(huán)境先正常的run起來。 """ Tensorflow implementation of the face detection / alignment algorithm found at https://github.com/kpzhang93/MTCNN_face_detection_alignment """ # MIT License # # Copyright (c) 2016 David Sandberg # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE.from __future__ import absolute_import from __future__ import division from __future__ import print_functionimport numpy as np import tensorflow as tf #from math import floor import cv2 import osdef layer(op):'''Decorator for composable network layers.'''def layer_decorated(self, *args, **kwargs):# Automatically set a name if not provided.name = kwargs.setdefault('name', self.get_unique_name(op.__name__))# Figure out the layer inputs.if len(self.terminals) == 0:raise RuntimeError('No input variables found for layer %s.' % name)elif len(self.terminals) == 1:layer_input = self.terminals[0]else:layer_input = list(self.terminals)# Perform the operation and get the output.layer_output = op(self, layer_input, *args, **kwargs)# Add to layer LUT.self.layers[name] = layer_output# This output is now the input for the next layer.self.feed(layer_output)# Return self for chained calls.return selfreturn layer_decoratedclass Network(object):def __init__(self, inputs, trainable=True):# The input nodes for this networkself.inputs = inputs# The current list of terminal nodesself.terminals = []# Mapping from layer names to layersself.layers = dict(inputs)# If true, the resulting variables are set as trainableself.trainable = trainableself.setup()def setup(self):'''Construct the network. '''raise NotImplementedError('Must be implemented by the subclass.')def load(self, data_path, session, ignore_missing=False):'''Load network weights.data_path: The path to the numpy-serialized network weightssession: The current TensorFlow sessionignore_missing: If true, serialized weights for missing layers are ignored.'''#data_dict = np.load(data_path).item() #pylint: disable=no-member #change 2017-12-13data_dict = np.load(data_path, encoding="latin1").item() #pylint: disable=no-memberfor op_name in data_dict:with tf.variable_scope(op_name, reuse=True):#for param_name, data in data_dict[op_name].iteritems(): #python 2 coding stylefor param_name, data in data_dict[op_name].items():try:var = tf.get_variable(param_name)session.run(var.assign(data))except ValueError:if not ignore_missing:raisedef feed(self, *args):'''Set the input(s) for the next operation by replacing the terminal nodes.The arguments can be either layer names or the actual layers.'''assert len(args) != 0self.terminals = []for fed_layer in args:#if isinstance(fed_layer, basestring):if isinstance(fed_layer, str):try:fed_layer = self.layers[fed_layer]except KeyError:raise KeyError('Unknown layer name fed: %s' % fed_layer)self.terminals.append(fed_layer)return selfdef get_output(self):'''Returns the current network output.'''return self.terminals[-1]def get_unique_name(self, prefix):'''Returns an index-suffixed unique name for the given prefix.This is used for auto-generating layer names based on the type-prefix.'''ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1return '%s_%d' % (prefix, ident)def make_var(self, name, shape):'''Creates a new TensorFlow variable.'''return tf.get_variable(name, shape, trainable=self.trainable)def validate_padding(self, padding):'''Verifies that the padding is one of the supported ones.'''assert padding in ('SAME', 'VALID')@layerdef conv(self,inp,k_h,k_w,c_o,s_h,s_w,name,relu=True,padding='SAME',group=1,biased=True):# Verify that the padding is acceptableself.validate_padding(padding)# Get the number of channels in the inputc_i = inp.get_shape()[-1]# Verify that the grouping parameter is validassert c_i % group == 0assert c_o % group == 0# Convolution for a given input and kernelconvolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)with tf.variable_scope(name) as scope:kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])# This is the common-case. Convolve the input without any further complications.output = convolve(inp, kernel)# Add the biasesif biased:biases = self.make_var('biases', [c_o])output = tf.nn.bias_add(output, biases)if relu:# ReLU non-linearityoutput = tf.nn.relu(output, name=scope.name)return output@layerdef prelu(self, inp, name):with tf.variable_scope(name):i = inp.get_shape().as_list()alpha = self.make_var('alpha', shape=(i[-1]))#output = tf.nn.relu(inp) + tf.mul(alpha, -tf.nn.relu(-inp)) //yafei change 2017-12-13output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))return output@layerdef max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):self.validate_padding(padding)return tf.nn.max_pool(inp,ksize=[1, k_h, k_w, 1],strides=[1, s_h, s_w, 1],padding=padding,name=name)@layerdef fc(self, inp, num_out, name, relu=True):with tf.variable_scope(name):input_shape = inp.get_shape()if input_shape.ndims == 4:# The input is spatial. Vectorize it first.dim = 1for d in input_shape[1:].as_list():dim *= dfeed_in = tf.reshape(inp, [-1, dim])else:feed_in, dim = (inp, input_shape[-1].value)weights = self.make_var('weights', shape=[dim, num_out])biases = self.make_var('biases', [num_out])op = tf.nn.relu_layer if relu else tf.nn.xw_plus_bfc = op(feed_in, weights, biases, name=name)return fc"""Multi dimensional softmax,refer to https://github.com/tensorflow/tensorflow/issues/210compute softmax along the dimension of targetthe native softmax only supports batch_size x dimension"""@layerdef softmax(self, target, axis, name=None):max_axis = tf.reduce_max(target, axis, keep_dims=True)target_exp = tf.exp(target-max_axis)normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)softmax = tf.div(target_exp, normalize, name)return softmaxclass PNet(Network):def setup(self):(self.feed('data') #pylint: disable=no-value-for-parameter, no-member.conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1').prelu(name='PReLU1').max_pool(2, 2, 2, 2, name='pool1').conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2').prelu(name='PReLU2').conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3').prelu(name='PReLU3').conv(1, 1, 2, 1, 1, relu=False, name='conv4-1').softmax(3,name='prob1'))(self.feed('PReLU3') #pylint: disable=no-value-for-parameter.conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))class RNet(Network):def setup(self):(self.feed('data') #pylint: disable=no-value-for-parameter, no-member.conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1').prelu(name='prelu1').max_pool(3, 3, 2, 2, name='pool1').conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2').prelu(name='prelu2').max_pool(3, 3, 2, 2, padding='VALID', name='pool2').conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3').prelu(name='prelu3').fc(128, relu=False, name='conv4').prelu(name='prelu4').fc(2, relu=False, name='conv5-1').softmax(1,name='prob1'))(self.feed('prelu4') #pylint: disable=no-value-for-parameter.fc(4, relu=False, name='conv5-2'))class ONet(Network):def setup(self):(self.feed('data') #pylint: disable=no-value-for-parameter, no-member.conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1').prelu(name='prelu1').max_pool(3, 3, 2, 2, name='pool1').conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2').prelu(name='prelu2').max_pool(3, 3, 2, 2, padding='VALID', name='pool2').conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3').prelu(name='prelu3').max_pool(2, 2, 2, 2, name='pool3').conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4').prelu(name='prelu4').fc(256, relu=False, name='conv5').prelu(name='prelu5').fc(2, relu=False, name='conv6-1').softmax(1, name='prob1'))(self.feed('prelu5') #pylint: disable=no-value-for-parameter.fc(4, relu=False, name='conv6-2'))(self.feed('prelu5') #pylint: disable=no-value-for-parameter.fc(10, relu=False, name='conv6-3'))def create_mtcnn(sess, model_path):with tf.variable_scope('pnet'):data = tf.placeholder(tf.float32, (None,None,None,3), 'input')pnet = PNet({'data':data})pnet.load(os.path.join(model_path, 'det1.npy'), sess)with tf.variable_scope('rnet'):data = tf.placeholder(tf.float32, (None,24,24,3), 'input')rnet = RNet({'data':data})rnet.load(os.path.join(model_path, 'det2.npy'), sess)with tf.variable_scope('onet'):data = tf.placeholder(tf.float32, (None,48,48,3), 'input')onet = ONet({'data':data})onet.load(os.path.join(model_path, 'det3.npy'), sess)pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})return pnet_fun, rnet_fun, onet_fundef detect_face(img, minsize, pnet, rnet, onet, threshold, factor):# im: input image# minsize: minimum of faces' size# pnet, rnet, onet: caffemodel# threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold# fastresize: resize img from last scale (using in high-resolution images) if fastresize==truefactor_count=0total_boxes=np.empty((0,9))points=[]h=img.shape[0]w=img.shape[1]minl=np.amin([h, w])m=12.0/minsizeminl=minl*m# creat scale pyramidscales=[]while minl>=12:scales += [m*np.power(factor, factor_count)]minl = minl*factorfactor_count += 1# first stagefor j in range(len(scales)):scale=scales[j]hs=int(np.ceil(h*scale))ws=int(np.ceil(w*scale))im_data = imresample(img, (hs, ws))im_data = (im_data-127.5)*0.0078125img_x = np.expand_dims(im_data, 0)img_y = np.transpose(img_x, (0,2,1,3))out = pnet(img_y)out0 = np.transpose(out[0], (0,2,1,3))out1 = np.transpose(out[1], (0,2,1,3))boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])# inter-scale nmspick = nms(boxes.copy(), 0.5, 'Union')if boxes.size>0 and pick.size>0:boxes = boxes[pick,:]total_boxes = np.append(total_boxes, boxes, axis=0)numbox = total_boxes.shape[0]if numbox>0:pick = nms(total_boxes.copy(), 0.7, 'Union')total_boxes = total_boxes[pick,:]regw = total_boxes[:,2]-total_boxes[:,0]regh = total_boxes[:,3]-total_boxes[:,1]qq1 = total_boxes[:,0]+total_boxes[:,5]*regwqq2 = total_boxes[:,1]+total_boxes[:,6]*reghqq3 = total_boxes[:,2]+total_boxes[:,7]*regwqq4 = total_boxes[:,3]+total_boxes[:,8]*reghtotal_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))total_boxes = rerec(total_boxes.copy())total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)numbox = total_boxes.shape[0]if numbox>0:# second stagetempimg = np.zeros((24,24,3,numbox))for k in range(0,numbox):tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:tempimg[:,:,:,k] = imresample(tmp, (24, 24))else:return np.empty()tempimg = (tempimg-127.5)*0.0078125tempimg1 = np.transpose(tempimg, (3,1,0,2))out = rnet(tempimg1)out0 = np.transpose(out[0])out1 = np.transpose(out[1])score = out1[1,:]ipass = np.where(score>threshold[1])total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])mv = out0[:,ipass[0]]if total_boxes.shape[0]>0:pick = nms(total_boxes, 0.7, 'Union')total_boxes = total_boxes[pick,:]total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))total_boxes = rerec(total_boxes.copy())numbox = total_boxes.shape[0]if numbox>0:# third stagetotal_boxes = np.fix(total_boxes).astype(np.int32)dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)tempimg = np.zeros((48,48,3,numbox))for k in range(0,numbox):tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:tempimg[:,:,:,k] = imresample(tmp, (48, 48))else:return np.empty()tempimg = (tempimg-127.5)*0.0078125tempimg1 = np.transpose(tempimg, (3,1,0,2))out = onet(tempimg1)out0 = np.transpose(out[0])out1 = np.transpose(out[1])out2 = np.transpose(out[2])score = out2[1,:]points = out1ipass = np.where(score>threshold[2])points = points[:,ipass[0]]total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])mv = out0[:,ipass[0]]w = total_boxes[:,2]-total_boxes[:,0]+1h = total_boxes[:,3]-total_boxes[:,1]+1points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1if total_boxes.shape[0]>0:total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))pick = nms(total_boxes.copy(), 0.7, 'Min')total_boxes = total_boxes[pick,:]points = points[:,pick]return total_boxes, points# function [boundingbox] = bbreg(boundingbox,reg) def bbreg(boundingbox,reg):# calibrate bounding boxesif reg.shape[1]==1:reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))w = boundingbox[:,2]-boundingbox[:,0]+1h = boundingbox[:,3]-boundingbox[:,1]+1b1 = boundingbox[:,0]+reg[:,0]*wb2 = boundingbox[:,1]+reg[:,1]*hb3 = boundingbox[:,2]+reg[:,2]*wb4 = boundingbox[:,3]+reg[:,3]*hboundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))return boundingboxdef generateBoundingBox(imap, reg, scale, t):# use heatmap to generate bounding boxesstride=2cellsize=12imap = np.transpose(imap)dx1 = np.transpose(reg[:,:,0])dy1 = np.transpose(reg[:,:,1])dx2 = np.transpose(reg[:,:,2])dy2 = np.transpose(reg[:,:,3])y, x = np.where(imap >= t)if y.shape[0]==1:dx1 = np.flipud(dx1)dy1 = np.flipud(dy1)dx2 = np.flipud(dx2)dy2 = np.flipud(dy2)score = imap[(y,x)]reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))if reg.size==0:reg = np.empty((0,3))bb = np.transpose(np.vstack([y,x]))q1 = np.fix((stride*bb+1)/scale)q2 = np.fix((stride*bb+cellsize-1+1)/scale)boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])return boundingbox, reg# function pick = nms(boxes,threshold,type) def nms(boxes, threshold, method):if boxes.size==0:return np.empty((0,3))x1 = boxes[:,0]y1 = boxes[:,1]x2 = boxes[:,2]y2 = boxes[:,3]s = boxes[:,4]area = (x2-x1+1) * (y2-y1+1)I = np.argsort(s)pick = np.zeros_like(s, dtype=np.int16)counter = 0while I.size>0:i = I[-1]pick[counter] = icounter += 1idx = I[0:-1]xx1 = np.maximum(x1[i], x1[idx])yy1 = np.maximum(y1[i], y1[idx])xx2 = np.minimum(x2[i], x2[idx])yy2 = np.minimum(y2[i], y2[idx])w = np.maximum(0.0, xx2-xx1+1)h = np.maximum(0.0, yy2-yy1+1)inter = w * hif method is 'Min':o = inter / np.minimum(area[i], area[idx])else:o = inter / (area[i] + area[idx] - inter)I = I[np.where(o<=threshold)]pick = pick[0:counter]return pick# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) def pad(total_boxes, w, h):# compute the padding coordinates (pad the bounding boxes to square)tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)numbox = total_boxes.shape[0]dx = np.ones((numbox), dtype=np.int32)dy = np.ones((numbox), dtype=np.int32)edx = tmpw.copy().astype(np.int32)edy = tmph.copy().astype(np.int32)x = total_boxes[:,0].copy().astype(np.int32)y = total_boxes[:,1].copy().astype(np.int32)ex = total_boxes[:,2].copy().astype(np.int32)ey = total_boxes[:,3].copy().astype(np.int32)tmp = np.where(ex>w)#edx[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)edx[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],0)ex[tmp] = wtmp = np.where(ey>h)#edy[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)edy[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],0)ey[tmp] = htmp = np.where(x<1)#dx[tmp] = np.expand_dims(2-x[tmp],1)dx[tmp] = np.expand_dims(2-x[tmp],0)x[tmp] = 1tmp = np.where(y<1)#dy[tmp] = np.expand_dims(2-y[tmp],1)dy[tmp] = np.expand_dims(2-y[tmp],0)y[tmp] = 1return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph# function [bboxA] = rerec(bboxA) def rerec(bboxA):# convert bboxA to squareh = bboxA[:,3]-bboxA[:,1]w = bboxA[:,2]-bboxA[:,0]l = np.maximum(w, h)bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))return bboxAdef imresample(img, sz):im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #pylint: disable=no-memberreturn im_data# This method is kept for debugging purpose # h=img.shape[0] # w=img.shape[1] # hs, ws = sz # dx = float(w) / ws # dy = float(h) / hs # im_data = np.zeros((hs,ws,3)) # for a1 in range(0,hs): # for a2 in range(0,ws): # for a3 in range(0,3): # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] # return im_data
總結
以上是生活随笔為你收集整理的win10 tensorflow MTCNN Demo的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Dataset之MNIST:MNIST(
- 下一篇: gawk程序