Keras 构建DNN 对用户名检测判断是否为非法用户名(从数据预处理到模型在线预测)
生活随笔
收集整理的這篇文章主要介紹了
Keras 构建DNN 对用户名检测判断是否为非法用户名(从数据预处理到模型在线预测)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
一.? 數據集的準備與預處理
1 . 收集dataset
(大量用戶名--包含正常用戶名與非法用戶名)
包含兩個txt文件? legal_name.txt? ilegal_name.txt. 如下圖所示
2. 用文件進行預處理
# Data sets import os import pandas as pdDATAPATH = "../dataset"POS = os.path.join(DATAPATH, "legal_name.txt") POS_OUTPUT = os.path.join(DATAPATH, "legal_name.csv")NEG = os.path.join(DATAPATH, "ilegal_name.txt") NEG_OUTPUT = os.path.join(DATAPATH, "ilegal_name.csv")def process_org_data(input_data, output_data, lable):reader = pd.read_csv(input_data, iterator=True)while True:try:train = reader.get_chunk(10000)train['username'] = train['username'].astype(str)train['username'] = map(lambda x: x.strip(), train['username'])train['length'] = train['username'].apply(len)... ...train['label'] = map(lambda x: lable, train['username'])train.to_csv(output_data, encoding='utf-8', mode='a', index=False, header=False)except StopIteration:print "Iteration is stopped."breakif __name__ == '__main__':process_org_data(POS, POS_OUTPUT, 1)process_org_data(NEG, NEG_OUTPUT, 0)根據需求提取相應的特征, 輸出成 csv 格式,包含特征列與label列把合法用戶dataset與非法用戶dataset,合并打亂,切割成 train.csv 和 test.csv
二.? Keras 構建DNN模型進行訓練與模型保存
import pandas as pd import os import tensorflow as tf import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report import numpy as nptf.logging.set_verbosity(tf.logging.INFO)DataPath = "../dataset"TRAIN = os.path.join(DataPath, "train.csv") TEST = os.path.join(DataPath, "test.csv")COLUMNS = ["username", ... , "label"]train_dataset = pd.read_csv(TRAIN, skipinitialspace=True, skiprows=1, names=COLUMNS) test_dataset = pd.read_csv(TEST, skipinitialspace=True, skiprows=1, names=COLUMNS)for col in train_dataset.columns[1:]:train_dataset[col] = pd.to_numeric(train_dataset[col], errors='coerce')for col in test_dataset.columns[1:]:test_dataset[col] = pd.to_numeric(test_dataset[col], errors='coerce')X_train = train_dataset.iloc[:, range(1, 19)].values y_train = train_dataset.iloc[:, 19].valuesX_test = test_dataset.iloc[:, range(1, 19)].values y_test = test_dataset.iloc[:, 19].valuesdef build_model():############model = tf.keras.models.Sequential()model.add(tf.keras.layers.Dense(64, input_dim=18))# model.add(tf.keras.layers.BatchNormalization())model.add(tf.keras.layers.Activation('relu'))model.add(tf.keras.layers.Dense(32))# model.add(tf.keras.layers.BatchNormalization())model.add(tf.keras.layers.Activation('relu'))model.add(tf.keras.layers.Dense(16))# model.add(tf.keras.layers.BatchNormalization())model.add(tf.keras.layers.Activation('relu'))model.add(tf.keras.layers.Dense(1, activation='sigmoid'))return if __name__ == '__main__':model_file = './my_model.h5'if (os.path.isfile(model_file)):print('model file detected. Loading.')model = tf.keras.models.load_model(model_file)else:print('No model file detected. Starting from scratch.')model = build_model()model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])model.fit(X_train, y_train, batch_size=100, epochs=1, validation_data=(X_test, y_test))模型輸出為? my_model.h5 , 準確率百分之90%
三. 導出tensorflow-serving 模型, 運行在線預測服務
def save_model_for_production(model, version, path='prod_models'):tf.keras.backend.set_learning_phase(1)if not os.path.exists(path):os.mkdir(path)export_path = os.path.join(tf.compat.as_bytes(path),tf.compat.as_bytes(version))builder = tf.saved_model.builder.SavedModelBuilder(export_path)model_input = tf.saved_model.utils.build_tensor_info(model.input)model_output = tf.saved_model.utils.build_tensor_info(model.output)prediction_signature = (tf.saved_model.signature_def_utils.build_signature_def(inputs={'inputs': model_input},outputs={'output': model_output},method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))with tf.keras.backend.get_session() as sess:builder.add_meta_graph_and_variables(sess=sess, tags=[tf.saved_model.tag_constants.SERVING],signature_def_map={'predict':prediction_signature,})builder.save()
導出為 tensorflow serving 模型
export_path = "tf-model" save_model_for_production(model, "7", export_path)運行在線預測服務(tensorflow 官方方法)
/serving/bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server --port=9000 --model_name=username --model_base_path=/data/model/tf-model
四. client通過grpc 調用預測服務
#!/usr/bin/env python
# encoding: utf-8 """
@version: v1.0
@author: zwqjoy
@contact: zwqjoy@163.com
@site: https://blog.csdn.net/zwqjoy
@file: client
@time: 2018/6/29 15:02
"""from grpc.beta import implementations
import tensorflow as tffrom tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
import numpy as nptf.app.flags.DEFINE_string('server', '172.xxx.xxx.xxx:9000','PredictionService host:port')
tf.app.flags.DEFINE_string('username', 'demo_user','傳入一個username')
FLAGS = tf.app.flags.FLAGSdef nametovec(username):username = username.astype(str)length = len(username)... ...return np.array([length, ...])def main(_):host, port = FLAGS.server.split(':')channel = implementations.insecure_channel(host, int(port))stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)# Send request# See prediction_service.proto for gRPC request/response details.data = nametovec(FLAGS.username)data = data.astype(np.float32)request = predict_pb2.PredictRequest()request.model_spec.name = 'username' # 這個name跟tensorflow_model_server --model_name="username" 對應request.model_spec.signature_name = 'predict' # 這個signature_name 跟signature_def_map 對應request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(data, shape=(1, 18)))result = stub.Predict(request, 10.0) # 10 secs timeoutprint(result)if __name__ == '__main__':tf.app.run()
總結
以上是生活随笔為你收集整理的Keras 构建DNN 对用户名检测判断是否为非法用户名(从数据预处理到模型在线预测)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: windows hello怎么开启 wi
- 下一篇: [机器学习] --- 参数优化与模型选择