123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- import tensorflow as tf
- import numpy as np
- from log_ware import LogWare
- logger = LogWare().get_logger()
- class CNN(object):
- def __init__(self, image_height, image_width, max_captcha, char_set, model_save_dir):
- # 初始值
- self.image_height = image_height
- self.image_width = image_width
- self.max_captcha = max_captcha
- self.char_set = char_set
- self.char_set_len = len(char_set)
- self.model_save_dir = model_save_dir # 模型路径
- with tf.name_scope('parameters'):
- self.w_alpha = 0.01
- self.b_alpha = 0.1
- # tf初始化占位符
- with tf.name_scope('data'):
- self.X = tf.placeholder(tf.float32, [None, self.image_height * self.image_width]) # 特征向量
- self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len]) # 标签
- self.keep_prob = tf.placeholder(tf.float32) # dropout值
- @staticmethod
- def convert2gray(img):
- """
- 图片转为灰度图,如果是3通道图则计算,单通道图则直接返回
- :param img:
- :return:
- """
- if len(img.shape) > 2:
- r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
- gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
- return gray
- else:
- return img
- def text2vec(self, text):
- """
- 转标签为oneHot编码
- :param text: str
- :return: numpy.array
- """
- text_len = len(text)
- if text_len > self.max_captcha:
- raise ValueError('验证码最长{}个字符'.format(self.max_captcha))
- vector = np.zeros(self.max_captcha * self.char_set_len)
- for i, ch in enumerate(text):
- idx = i * self.char_set_len + self.char_set.index(ch)
- vector[idx] = 1
- return vector
- def model(self):
- x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1])
- logger.debug(">>> input x: %s", x)
- # 卷积层1
- wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
- initializer=tf.contrib.layers.xavier_initializer())
- bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]))
- conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
- conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
- conv1 = tf.nn.dropout(conv1, self.keep_prob)
- # 卷积层2
- wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
- initializer=tf.contrib.layers.xavier_initializer())
- bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]))
- conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
- conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
- conv2 = tf.nn.dropout(conv2, self.keep_prob)
- # 卷积层3
- wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
- initializer=tf.contrib.layers.xavier_initializer())
- bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]))
- conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
- conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
- conv3 = tf.nn.dropout(conv3, self.keep_prob)
- logger.debug(">>> convolution 3: %s", conv3.shape)
- next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]
- # 全连接层1
- wd1 = tf.get_variable(name='wd1', shape=[next_shape, 1024], dtype=tf.float32,
- initializer=tf.contrib.layers.xavier_initializer())
- bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]))
- dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
- dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
- dense = tf.nn.dropout(dense, self.keep_prob)
- # 全连接层2
- wout = tf.get_variable('name', shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,
- initializer=tf.contrib.layers.xavier_initializer())
- bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]))
- with tf.name_scope('y_prediction'):
- y_predict = tf.add(tf.matmul(dense, wout), bout)
- return y_predict
|