Jelajahi Sumber

创建项目

csk 5 tahun lalu
melakukan
7c904ed446
43 mengubah file dengan 2959 tambahan dan 0 penghapusan
  1. 118 0
      .gitignore
  2. 201 0
      LICENSE
  3. 44 0
      Pipfile
  4. 477 0
      Pipfile.lock
  5. 465 0
      README.md
  6. 104 0
      cnnlib/network.py
  7. 81 0
      cnnlib/recognition_object.py
  8. 11 0
      conf/captcha_config.json
  9. 29 0
      conf/sample_config.json
  10. 60 0
      conf/sample_config.md
  11. 57 0
      gen_sample_by_captcha.py
  12. 95 0
      log_ware.py
  13. TEMPAT SAMPAH
      readme_image/Cage1.jpg
  14. TEMPAT SAMPAH
      readme_image/Cage2.jpg
  15. TEMPAT SAMPAH
      readme_image/Kaptcha_2.png
  16. TEMPAT SAMPAH
      readme_image/Kaptcha_3.png
  17. TEMPAT SAMPAH
      readme_image/Kaptcha_5.png
  18. TEMPAT SAMPAH
      readme_image/SimpleCaptcha_1.jpg
  19. TEMPAT SAMPAH
      readme_image/SimpleCaptcha_2.jpg
  20. TEMPAT SAMPAH
      readme_image/SimpleCaptcha_3.jpg
  21. TEMPAT SAMPAH
      readme_image/SkewPassImage.jpg
  22. TEMPAT SAMPAH
      readme_image/bug_api启动失败.png
  23. TEMPAT SAMPAH
      readme_image/iCaptcha.jpg
  24. TEMPAT SAMPAH
      readme_image/jcaptcha1.jpg
  25. TEMPAT SAMPAH
      readme_image/jcaptcha2.jpg
  26. TEMPAT SAMPAH
      readme_image/jcaptcha3.jpg
  27. TEMPAT SAMPAH
      readme_image/patchca_1.png
  28. TEMPAT SAMPAH
      readme_image/py_Captcha-1.jpg
  29. TEMPAT SAMPAH
      readme_image/test_acc.png
  30. TEMPAT SAMPAH
      readme_image/train_acc.png
  31. TEMPAT SAMPAH
      readme_image/压力测试结果.png
  32. 76 0
      recognize_local.py
  33. 131 0
      recognize_online.py
  34. 78 0
      recognize_time_test.py
  35. 36 0
      requirements.txt
  36. 125 0
      test_batch.py
  37. 31 0
      tools/collect_labels.py
  38. 91 0
      tools/correction_captcha.py
  39. 19 0
      tools/gen_md_content.py
  40. 288 0
      train_model.py
  41. 160 0
      verify_and_split_data.py
  42. 75 0
      webserver_captcha_image.py
  43. 107 0
      webserver_recognize_api.py

+ 118 - 0
.gitignore

@@ -0,0 +1,118 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# pycharm
+.idea/
+
+# 数据文件
+sample/
+model/
+labels.json
+test.csv
+loss_test.csv
+loss_train.csv
+
+
+logs/

+ 201 - 0
LICENSE

@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 44 - 0
Pipfile

@@ -0,0 +1,44 @@
+[[source]]
+name = "pypi"
+url = "https://mirrors.aliyun.com/pypi/simple/"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+absl-py = "==0.7.1"
+astor = "==0.7.1"
+bleach = "==1.5.0"
+captcha = "==0.3"
+certifi = "==2019.3.9"
+chardet = "==3.0.4"
+click = "==7.0"
+cycler = "==0.10.0"
+easydict = "==1.8"
+flask = "==1.0.2"
+gast = "==0.2.2"
+grpcio = "==1.19.0"
+html5lib = "==0.9999999"
+idna = "==2.7"
+itsdangerous = "==1.1.0"
+jinja2 = "==2.10.1"
+markdown = "==3.1"
+markupsafe = "==1.1.1"
+matplotlib = "==2.1.0"
+numpy = "==1.16.2"
+olefile = "==0.46"
+pillow = "==4.3.0"
+protobuf = "==3.6.1"
+pyparsing = "==2.4.0"
+python-dateutil = "==2.8.0"
+pytz = "==2018.9"
+requests = "==2.19.1"
+six = "==1.12.0"
+tensorboard = "==1.12.0"
+tensorflow = "==1.12.0"
+termcolor = "==1.1.0"
+urllib3 = "==1.23"
+werkzeug = "==0.15.2"
+
+[requires]
+python_version = "3.6"

+ 477 - 0
Pipfile.lock

@@ -0,0 +1,477 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "8169529944b62b88a3611ad616c4f2e441c3c2a808ce325c276c0c61b9ec4289"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.6"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://mirrors.aliyun.com/pypi/simple/",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "absl-py": {
+            "hashes": [
+                "sha256:b943d1c567743ed0455878fcd60bc28ac9fae38d129d1ccfad58079da00b8951"
+            ],
+            "index": "pypi",
+            "version": "==0.7.1"
+        },
+        "astor": {
+            "hashes": [
+                "sha256:95c30d87a6c2cf89aa628b87398466840f0ad8652f88eb173125a6df8533fb8d",
+                "sha256:fb503b9e2fdd05609fbf557b916b4a7824171203701660f0c55bbf5a7a68713e"
+            ],
+            "index": "pypi",
+            "version": "==0.7.1"
+        },
+        "bleach": {
+            "hashes": [
+                "sha256:978e758599b54cd3caa2e160d74102879b230ea8dc93871d0783721eef58bc65",
+                "sha256:e67f46adcec78dbc3c04462f3aba3213a673d5652eba2609ed1ef15492a44b8d"
+            ],
+            "index": "pypi",
+            "version": "==1.5.0"
+        },
+        "captcha": {
+            "hashes": [
+                "sha256:1671f194da3b535fc12f6b0eb349195c7b28a6641381b2c07e31d04aa92fb6fc",
+                "sha256:a6b28a120de0a37c44415e70225978e36b2645940133f2474c7a109b2d4683e2"
+            ],
+            "index": "pypi",
+            "version": "==0.3"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5",
+                "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae"
+            ],
+            "index": "pypi",
+            "version": "==2019.3.9"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "index": "pypi",
+            "version": "==3.0.4"
+        },
+        "click": {
+            "hashes": [
+                "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
+                "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
+            ],
+            "index": "pypi",
+            "version": "==7.0"
+        },
+        "cycler": {
+            "hashes": [
+                "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d",
+                "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"
+            ],
+            "index": "pypi",
+            "version": "==0.10.0"
+        },
+        "easydict": {
+            "hashes": [
+                "sha256:f1ec91110737a62fe28d14970ffa7a7c7b441a32e35a6f3da6a6082ffb7f9432"
+            ],
+            "index": "pypi",
+            "version": "==1.8"
+        },
+        "flask": {
+            "hashes": [
+                "sha256:2271c0070dbcb5275fad4a82e29f23ab92682dc45f9dfbc22c02ba9b9322ce48",
+                "sha256:a080b744b7e345ccfcbc77954861cb05b3c63786e93f2b3875e0913d44b43f05"
+            ],
+            "index": "pypi",
+            "version": "==1.0.2"
+        },
+        "gast": {
+            "hashes": [
+                "sha256:fe939df4583692f0512161ec1c880e0a10e71e6a232da045ab8edd3756fbadf0"
+            ],
+            "index": "pypi",
+            "version": "==0.2.2"
+        },
+        "grpcio": {
+            "hashes": [
+                "sha256:07c7f7b251b26ef94e29d2c19245e34d4d05897325a289b31de3b6a5e16fbd6c",
+                "sha256:2ddbca16c2e7b3f2ffc6e34c7cfa6886fb01de9f156ad3f77b72ad652d632097",
+                "sha256:30d84f9684b4c81ee37906bb303a84435948c2dd3db55d3ef38f8daf28bc6ea3",
+                "sha256:316e6c79fb1585b23ae100ee26f6ffefa91a21e4d39588fa42efadd7f20c7225",
+                "sha256:400abff9a772351fff72d5698c8758b837bec3d7f4ed93de70bae744d8f63f53",
+                "sha256:4ed90a256f6f8690b5c95b9d4f2e9fe6513628f3674e9068e10637e50c2f93d6",
+                "sha256:51fd87ff610ca2f483c668c3fa7f70d479bffb3c14805d2065b51194edea5e26",
+                "sha256:5569aba69041530e04eff3d40536027db8851f4e11e6282849b9fc5b1855075d",
+                "sha256:566b752e36cdcd5a4d38f292aca4c8e3095f13cfe82606e010d67749cacba341",
+                "sha256:5817f970fbfed72a6203ff96349e796d8f6ff3ce85b58af241c4a14190d9f4d1",
+                "sha256:5a97bb5a4af16f840f1211dbe66d61592f02110f286d96e67bf6006d7f96aab7",
+                "sha256:5d57e41c913152b215eda070955b3544bdf20ed2327e5e5eed3005186220ebd0",
+                "sha256:6cec17145978cef3d20093cdc05e88da597ce05076db566a66a35b9c55d416a3",
+                "sha256:6ef7ab9b6ba09ce087ddb3b27f12504f50efdbf5d319b8b23173478765452301",
+                "sha256:756c0d65e4ebce1c47787dbb48955864f2a768e1df76902f33d3e4062c209f3e",
+                "sha256:828d13f0edd27f452af7fc23093c8a2d63d8fbd92595dbd0f698c78b13af9bdb",
+                "sha256:8cf02c4e07520be61ad8b59b0043771ef2af666cb73066516eabfee562a28df4",
+                "sha256:919dfe84d22ce2e2ae81d82238586d7c2a86714fb0b6cf9b437e336851e3c32d",
+                "sha256:b04a061280b06cdc4e68c4147a0f46b98c395cf62f0c6df4fa2a30a083cdc333",
+                "sha256:b2dbe7d2f9685bdbb4415f8e475dd96b1b1776193b7286705f90490c3f039037",
+                "sha256:b60df7cbc3e77c39d5befe6a1e6e4213f3ca683d743ff7c1622b1d4412245a55",
+                "sha256:b740681332b5a042b9e22246a3cdbfc3d644cf73d38e117f20ad9d8deab8f1a5",
+                "sha256:ba434873945d5d4542589674cb60c43a1cf76b2b5f0c0f759aa76d499055722f",
+                "sha256:bcb44cd53beccc92c730254ad3d50715b67a7432e693961b566d982f759b1787",
+                "sha256:be1cbb6cad1d4242e3aaa4143eabcfbf383358f6c8e9951be2c497b65561b075",
+                "sha256:c4e38326fcab5c52fd1a8c8e0f908bfe830629a5ffc60793ec5545ef913d62d2",
+                "sha256:d03c0524d5953568f74269e0faebb1e880ba9f36ca8c773be397087c35bd8188",
+                "sha256:ea897ffa80276565acdd92349ef82a768db0e3327aacd4aec82f79ca10989689",
+                "sha256:edc50e8bcd10b165f34c3cf3e1d4f97e9c71b165b85a85b91cf3444000a17692",
+                "sha256:f96a2e97df522b50da9cb3795f08199b110ceab4146bf70ea7f6a3a0213786cc",
+                "sha256:fadb649a69e3b08e01f090c24f0c8cccc122e92c362c1a1727b695a63be8416b",
+                "sha256:fbe4360ff1689a9753cbf1b27dad11e683d39117a32a64372a7c95c6abc81b81"
+            ],
+            "index": "pypi",
+            "version": "==1.19.0"
+        },
+        "h5py": {
+            "hashes": [
+                "sha256:063947eaed5f271679ed4ffa36bb96f57bc14f44dd4336a827d9a02702e6ce6b",
+                "sha256:13c87efa24768a5e24e360a40e0bc4c49bcb7ce1bb13a3a7f9902cec302ccd36",
+                "sha256:16ead3c57141101e3296ebeed79c9c143c32bdd0e82a61a2fc67e8e6d493e9d1",
+                "sha256:3dad1730b6470fad853ef56d755d06bb916ee68a3d8272b3bab0c1ddf83bb99e",
+                "sha256:51ae56894c6c93159086ffa2c94b5b3388c0400548ab26555c143e7cfa05b8e5",
+                "sha256:54817b696e87eb9e403e42643305f142cd8b940fe9b3b490bbf98c3b8a894cf4",
+                "sha256:549ad124df27c056b2e255ea1c44d30fb7a17d17676d03096ad5cd85edb32dc1",
+                "sha256:64f74da4a1dd0d2042e7d04cf8294e04ddad686f8eba9bb79e517ae582f6668d",
+                "sha256:6998be619c695910cb0effe5eb15d3a511d3d1a5d217d4bd0bebad1151ec2262",
+                "sha256:6ef7ab1089e3ef53ca099038f3c0a94d03e3560e6aff0e9d6c64c55fb13fc681",
+                "sha256:769e141512b54dee14ec76ed354fcacfc7d97fea5a7646b709f7400cf1838630",
+                "sha256:79b23f47c6524d61f899254f5cd5e486e19868f1823298bc0c29d345c2447172",
+                "sha256:7be5754a159236e95bd196419485343e2b5875e806fe68919e087b6351f40a70",
+                "sha256:84412798925dc870ffd7107f045d7659e60f5d46d1c70c700375248bf6bf512d",
+                "sha256:86868dc07b9cc8cb7627372a2e6636cdc7a53b7e2854ad020c9e9d8a4d3fd0f5",
+                "sha256:8bb1d2de101f39743f91512a9750fb6c351c032e5cd3204b4487383e34da7f75",
+                "sha256:a5f82cd4938ff8761d9760af3274acf55afc3c91c649c50ab18fcff5510a14a5",
+                "sha256:aac4b57097ac29089f179bbc2a6e14102dd210618e94d77ee4831c65f82f17c0",
+                "sha256:bffbc48331b4a801d2f4b7dac8a72609f0b10e6e516e5c480a3e3241e091c878",
+                "sha256:c0d4b04bbf96c47b6d360cd06939e72def512b20a18a8547fa4af810258355d5",
+                "sha256:c54a2c0dd4957776ace7f95879d81582298c5daf89e77fb8bee7378f132951de",
+                "sha256:cbf28ae4b5af0f05aa6e7551cee304f1d317dbed1eb7ac1d827cee2f1ef97a99",
+                "sha256:d35f7a3a6cefec82bfdad2785e78359a0e6a5fbb3f605dd5623ce88082ccd681",
+                "sha256:d3c59549f90a891691991c17f8e58c8544060fdf3ccdea267100fa5f561ff62f",
+                "sha256:d7ae7a0576b06cb8e8a1c265a8bc4b73d05fdee6429bffc9a26a6eb531e79d72",
+                "sha256:ecf4d0b56ee394a0984de15bceeb97cbe1fe485f1ac205121293fc44dcf3f31f",
+                "sha256:f0e25bb91e7a02efccb50aba6591d3fe2c725479e34769802fcdd4076abfa917",
+                "sha256:f23951a53d18398ef1344c186fb04b26163ca6ce449ebd23404b153fd111ded9",
+                "sha256:ff7d241f866b718e4584fa95f520cb19405220c501bd3a53ee11871ba5166ea2"
+            ],
+            "version": "==2.10.0"
+        },
+        "html5lib": {
+            "hashes": [
+                "sha256:2612a191a8d5842bfa057e41ba50bbb9dcb722419d2408c78cff4758d0754868"
+            ],
+            "index": "pypi",
+            "version": "==0.9999999"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
+                "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
+            ],
+            "index": "pypi",
+            "version": "==2.7"
+        },
+        "itsdangerous": {
+            "hashes": [
+                "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19",
+                "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749"
+            ],
+            "index": "pypi",
+            "version": "==1.1.0"
+        },
+        "jinja2": {
+            "hashes": [
+                "sha256:065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013",
+                "sha256:14dd6caf1527abb21f08f86c784eac40853ba93edb79552aa1e4b8aef1b61c7b"
+            ],
+            "index": "pypi",
+            "version": "==2.10.1"
+        },
+        "keras-applications": {
+            "hashes": [
+                "sha256:5579f9a12bcde9748f4a12233925a59b93b73ae6947409ff34aa2ba258189fe5",
+                "sha256:df4323692b8c1174af821bf906f1e442e63fa7589bf0f1230a0b6bdc5a810c95"
+            ],
+            "version": "==1.0.8"
+        },
+        "keras-preprocessing": {
+            "hashes": [
+                "sha256:44aee5f2c4d80c3b29f208359fcb336df80f293a0bb6b1c738da43ca206656fb",
+                "sha256:5a8debe01d840de93d49e05ccf1c9b81ae30e210d34dacbcc47aeb3049b528e5"
+            ],
+            "version": "==1.1.0"
+        },
+        "markdown": {
+            "hashes": [
+                "sha256:fc4a6f69a656b8d858d7503bda633f4dd63c2d70cf80abdc6eafa64c4ae8c250",
+                "sha256:fe463ff51e679377e3624984c829022e2cfb3be5518726b06f608a07a3aad680"
+            ],
+            "index": "pypi",
+            "version": "==3.1"
+        },
+        "markupsafe": {
+            "hashes": [
+                "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473",
+                "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161",
+                "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
+                "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
+                "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
+                "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
+                "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
+                "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
+                "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
+                "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66",
+                "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1",
+                "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
+                "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
+                "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
+                "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
+                "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
+                "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d",
+                "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e",
+                "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d",
+                "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c",
+                "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21",
+                "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2",
+                "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5",
+                "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b",
+                "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6",
+                "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f",
+                "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f",
+                "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"
+            ],
+            "index": "pypi",
+            "version": "==1.1.1"
+        },
+        "matplotlib": {
+            "hashes": [
+                "sha256:063a07660bee266c947e5ac0044053074c3ff3e5398614538d21c29b622ed0bd",
+                "sha256:09214eb11171a8426c9c89a236f05f26a76b204e5346dd4382ddcfb74820d467",
+                "sha256:11c7ed4716833c5b70f72f6c795a948d413e2af659442aa145d1e91868393084",
+                "sha256:14b0ab89e58621105c7b2cfd5ddc6cf13a87ac7cbcc73c287e4003d1f5e40fae",
+                "sha256:226f9c3799c16fdfc9be4b64bee2f1956626eddb58b24359247b0fb72afd76a0",
+                "sha256:353b56cd2fed8977767356cb686e3bf74789a4abd81e4bdfc4d57b40679fe057",
+                "sha256:4a2d2d97266e4e199266b7e409e5e8be4d3b26b4bad2702b70a8ae18f52d97b4",
+                "sha256:4b5f16c9cefde553ea79975305dcaa67c8e13d927b6e55aa14b4a8d867e25387",
+                "sha256:50aef54b4f947001667c775b1973928cdb3e33024cb93f81b5b024b34b1488a7",
+                "sha256:5c50343bc8e70f2b811188790fa899430062ce37696a166374239c838015a995",
+                "sha256:a5ba34f1b4d1e81be4df0fd033abd8062dbce2eca5b3a25339edb31e4390568a",
+                "sha256:b2e3b1eaf3a2753a1a63826bf4ab7487b383fbef5ef4941b25f4193baaa95d95",
+                "sha256:c665d3daf24f95acbd1541681ab1185e283afc046fe4175bfceab2cf26a8620b",
+                "sha256:c8579a840a1bd93e00a273aac0be3edd94b805fe4cd2156f01611622032188d1",
+                "sha256:d0d7036045e6a08663e428e4ea4af69164cdd79c486d31be835bb2ca7f5f1962",
+                "sha256:ea24f02268149aebe4f239635da8183f4e43f9116eebf8fcd365e2d04bc821fe",
+                "sha256:f490f684a241cd88003afebc356f5c69bdced22e549fbd50b914f7fe07ce4fa3",
+                "sha256:fa6155cf41857135bff125419256ccab6ce81e0cd63fac60a7c673efa8a989dd"
+            ],
+            "index": "pypi",
+            "version": "==2.1.0"
+        },
+        "numpy": {
+            "hashes": [
+                "sha256:1980f8d84548d74921685f68096911585fee393975f53797614b34d4f409b6da",
+                "sha256:22752cd809272671b273bb86df0f505f505a12368a3a5fc0aa811c7ece4dfd5c",
+                "sha256:23cc40313036cffd5d1873ef3ce2e949bdee0646c5d6f375bf7ee4f368db2511",
+                "sha256:2b0b118ff547fecabc247a2668f48f48b3b1f7d63676ebc5be7352a5fd9e85a5",
+                "sha256:3a0bd1edf64f6a911427b608a894111f9fcdb25284f724016f34a84c9a3a6ea9",
+                "sha256:3f25f6c7b0d000017e5ac55977a3999b0b1a74491eacb3c1aa716f0e01f6dcd1",
+                "sha256:4061c79ac2230594a7419151028e808239450e676c39e58302ad296232e3c2e8",
+                "sha256:560ceaa24f971ab37dede7ba030fc5d8fa173305d94365f814d9523ffd5d5916",
+                "sha256:62be044cd58da2a947b7e7b2252a10b42920df9520fc3d39f5c4c70d5460b8ba",
+                "sha256:6c692e3879dde0b67a9dc78f9bfb6f61c666b4562fd8619632d7043fb5b691b0",
+                "sha256:6f65e37b5a331df950ef6ff03bd4136b3c0bbcf44d4b8e99135d68a537711b5a",
+                "sha256:7a78cc4ddb253a55971115f8320a7ce28fd23a065fc33166d601f51760eecfa9",
+                "sha256:80a41edf64a3626e729a62df7dd278474fc1726836552b67a8c6396fd7e86760",
+                "sha256:893f4d75255f25a7b8516feb5766c6b63c54780323b9bd4bc51cdd7efc943c73",
+                "sha256:972ea92f9c1b54cc1c1a3d8508e326c0114aaf0f34996772a30f3f52b73b942f",
+                "sha256:9f1d4865436f794accdabadc57a8395bd3faa755449b4f65b88b7df65ae05f89",
+                "sha256:9f4cd7832b35e736b739be03b55875706c8c3e5fe334a06210f1a61e5c2c8ca5",
+                "sha256:adab43bf657488300d3aeeb8030d7f024fcc86e3a9b8848741ea2ea903e56610",
+                "sha256:bd2834d496ba9b1bdda3a6cf3de4dc0d4a0e7be306335940402ec95132ad063d",
+                "sha256:d20c0360940f30003a23c0adae2fe50a0a04f3e48dc05c298493b51fd6280197",
+                "sha256:d3b3ed87061d2314ff3659bb73896e622252da52558f2380f12c421fbdee3d89",
+                "sha256:dc235bf29a406dfda5790d01b998a1c01d7d37f449128c0b1b7d1c89a84fae8b",
+                "sha256:fb3c83554f39f48f3fa3123b9c24aecf681b1c289f9334f8215c1d3c8e2f6e5b"
+            ],
+            "index": "pypi",
+            "version": "==1.16.2"
+        },
+        "olefile": {
+            "hashes": [
+                "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"
+            ],
+            "index": "pypi",
+            "version": "==0.46"
+        },
+        "pillow": {
+            "hashes": [
+                "sha256:0e3b56364a2c772c961a8faad8a835d3f24d8848310de035c9e07cc006035cbc",
+                "sha256:1d742642d01914b7e0cf6fd597a51f57d21fd68f794cf84803e03e72db78a261",
+                "sha256:2046a2001e2c413998951cc28aa0dbfd4cff846a12e24c2145d42630d5104094",
+                "sha256:39c7c9dcf64430091e30ef14d4191b4cae9b7b5ff29762357730aac4866fb189",
+                "sha256:4fb8ab0f8895fb946454ef6ffe806f49ee387095f2d6112ae24670e5fb8fbcd9",
+                "sha256:53eaec751151b5713a15b1cd62b06d0fc16d72f56623c15448728c554c30770b",
+                "sha256:54898190b538a6c8fa4228e866ff2e7609da1ba9fd1d9cc5dc8ca591d37ce0a8",
+                "sha256:575a9b3468c82f38be0419cd39d35001ae95a0cc5226534e45430035fecef583",
+                "sha256:59cef683d79b85d55a950c1e61dc7b6be0c45a5074692746354cd9a8ace1cd17",
+                "sha256:6d814aa655d94c63547fc3208cb6ab886ff1a64c543b31f52658663b1bb3f011",
+                "sha256:759e5e3e99c4ac87b99e9288a75236c63173d1bb24c8d3f9d9d2c8332fceeb0a",
+                "sha256:822e4fc261d12fa44d88dadee0e93d59663db94d962d4ffffbf09b1fe5e5be51",
+                "sha256:9184b9788a9cf677e53626a4dc141136a22d349a5480479b98defd3cfb5015a4",
+                "sha256:92087cb92a968421f42235f7d8153f4766b6ba213a6efb36b8060f3c9d294569",
+                "sha256:922aeb050bd52d8ce9531ab57fd2440bfe975900e8700fec385fb741c3c557c7",
+                "sha256:9adcfa2477b7e279ebeee75b49f535518201bbd7d26ca2ef1cf6751cb6e658e8",
+                "sha256:a336596b06e062b92eb8201a3b5dff07ae01c3a5d08ce5539d2da49b123f2be6",
+                "sha256:a6f43511c79bed431ec2b56e55150b5222c732cd9e5f80e77a44e068e94c71fc",
+                "sha256:a97c715d44efd5b4aa8d739b8fad88b93ed79f1b33fc2822d5802043f3b1b527",
+                "sha256:b13106cb83a3b7d1a02fafb94bfafbc980465ba948b76ea1996245959c6783d2",
+                "sha256:be803fae6af36639524a0f6861a8cface67bbec66c3416c3eaf592f1d45b8b20",
+                "sha256:cc6a5ed5b8f9d2f25e4e42d562e0ec4df3ce838f9e9b9d9d9b65fac6fe93a4cc",
+                "sha256:dc32362d0cadf18c3aef7040455760106cafe7dd3c211dc27c507e746376bb56",
+                "sha256:e595312f67962d6b4fde3b7dffaaaca4becefa522d677676bb57b0ec5f8f921a",
+                "sha256:e66080685863444738f08e13081c287e340b6e4f8bd674a2e0da967776ac6f46",
+                "sha256:effa82e72f5064439a3d2c7ff615b999eb1c4d65bb1f1e6ee6e2ddb345b3e81e",
+                "sha256:f2d71951f473744ac617b645b62d0c4df5372ef4618c425646bfe5e2e8878e61"
+            ],
+            "index": "pypi",
+            "version": "==4.3.0"
+        },
+        "protobuf": {
+            "hashes": [
+                "sha256:10394a4d03af7060fa8a6e1cbf38cea44be1467053b0aea5bbfcb4b13c4b88c4",
+                "sha256:1489b376b0f364bcc6f89519718c057eb191d7ad6f1b395ffd93d1aa45587811",
+                "sha256:1931d8efce896981fe410c802fd66df14f9f429c32a72dd9cfeeac9815ec6444",
+                "sha256:196d3a80f93c537f27d2a19a4fafb826fb4c331b0b99110f985119391d170f96",
+                "sha256:46e34fdcc2b1f2620172d3a4885128705a4e658b9b62355ae5e98f9ea19f42c2",
+                "sha256:4b92e235a3afd42e7493b281c8b80c0c65cbef45de30f43d571d1ee40a1f77ef",
+                "sha256:574085a33ca0d2c67433e5f3e9a0965c487410d6cb3406c83bdaf549bfc2992e",
+                "sha256:59cd75ded98094d3cf2d79e84cdb38a46e33e7441b2826f3838dcc7c07f82995",
+                "sha256:5ee0522eed6680bb5bac5b6d738f7b0923b3cafce8c4b1a039a6107f0841d7ed",
+                "sha256:65917cfd5da9dfc993d5684643063318a2e875f798047911a9dd71ca066641c9",
+                "sha256:685bc4ec61a50f7360c9fd18e277b65db90105adbf9c79938bd315435e526b90",
+                "sha256:92e8418976e52201364a3174e40dc31f5fd8c147186d72380cbda54e0464ee19",
+                "sha256:9335f79d1940dfb9bcaf8ec881fb8ab47d7a2c721fb8b02949aab8bbf8b68625",
+                "sha256:a7ee3bb6de78185e5411487bef8bc1c59ebd97e47713cba3c460ef44e99b3db9",
+                "sha256:ceec283da2323e2431c49de58f80e1718986b79be59c266bb0509cbf90ca5b9e",
+                "sha256:fcfc907746ec22716f05ea96b7f41597dfe1a1c088f861efb8a0d4f4196a6f10"
+            ],
+            "index": "pypi",
+            "version": "==3.6.1"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:1873c03321fc118f4e9746baf201ff990ceb915f433f23b395f5580d1840cb2a",
+                "sha256:9b6323ef4ab914af344ba97510e966d64ba91055d6b9afa6b30799340e89cc03"
+            ],
+            "index": "pypi",
+            "version": "==2.4.0"
+        },
+        "python-dateutil": {
+            "hashes": [
+                "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
+                "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e"
+            ],
+            "index": "pypi",
+            "version": "==2.8.0"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9",
+                "sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c"
+            ],
+            "index": "pypi",
+            "version": "==2018.9"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
+                "sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a"
+            ],
+            "index": "pypi",
+            "version": "==2.19.1"
+        },
+        "six": {
+            "hashes": [
+                "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
+                "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
+            ],
+            "index": "pypi",
+            "version": "==1.12.0"
+        },
+        "tensorboard": {
+            "hashes": [
+                "sha256:537603db949e10d2f5f201d88b073f3f8fb4e4c311d5541e1d4518aa59aa8daa",
+                "sha256:ca275a7e39797946930d7d4460999369b73968e8191f2256e23bfb7924004d59"
+            ],
+            "index": "pypi",
+            "version": "==1.12.0"
+        },
+        "tensorflow": {
+            "hashes": [
+                "sha256:16fb8a59e724afd37a276d33b7e2ed070e5c84899a8d4cfc3fe1bb446a859da7",
+                "sha256:1ae50e44c0b29df5fb5b460118be5a257b4eb3e561008f64d2c4c715651259b7",
+                "sha256:1b7d09cc26ef727d628dcb74841b89374a38ed81af25bd589a21659ef67443da",
+                "sha256:2681b55d3e434e20fe98e3a3b1bde3588af62d7864b62feee4141a71e29ef594",
+                "sha256:42fc8398ce9f9895b488f516ea0143cf6cf2a3a5fc804da4a190b063304bc173",
+                "sha256:531619ad1c17b4084d09f442a9171318af813e81aae748e5de8274d561461749",
+                "sha256:5cee35f8a6a12e83560f30246811643efdc551c364bc981d27f21fbd0926403d",
+                "sha256:6ad6ed495f1a3d445c43d90cb2ce251ff5532fd6436e25f52977ee59ffa583df",
+                "sha256:cd8c1a899e3befe1ccb774ea1aae077a4b1286f855c956210b23766f4ac85c30",
+                "sha256:d3f3d7cd9bd4cdc7ebf25fd6c2dfc103dcf4b2834ae9276cc4cf897eb1515f6d",
+                "sha256:e4f479e6aca595acc98347364288cbdfd3c025ca85389380174ea75a43c327b7",
+                "sha256:f587dc03b5f0d1e50cca39b7159c9f21ffdec96273dbf5f7619d48c622cb21f2"
+            ],
+            "index": "pypi",
+            "version": "==1.12.0"
+        },
+        "termcolor": {
+            "hashes": [
+                "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
+            ],
+            "index": "pypi",
+            "version": "==1.1.0"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
+                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
+            ],
+            "index": "pypi",
+            "version": "==1.23"
+        },
+        "werkzeug": {
+            "hashes": [
+                "sha256:0a73e8bb2ff2feecfc5d56e6f458f5b99290ef34f565ffb2665801ff7de6af7a",
+                "sha256:7fad9770a8778f9576693f0cc29c7dcc36964df916b83734f4431c0e612a7fbc"
+            ],
+            "index": "pypi",
+            "version": "==0.15.2"
+        },
+        "wheel": {
+            "hashes": [
+                "sha256:10c9da68765315ed98850f8e048347c3eb06dd81822dc2ab1d4fde9dc9702646",
+                "sha256:f4da1763d3becf2e2cd92a14a7c920f0f00eca30fdde9ea992c836685b9faf28"
+            ],
+            "markers": "python_version >= '3'",
+            "version": "==0.33.6"
+        }
+    },
+    "develop": {}
+}

+ 465 - 0
README.md

@@ -0,0 +1,465 @@
+# cnn_captcha
+use CNN recognize captcha by tensorflow.  
+本项目针对字符型图片验证码,使用tensorflow实现卷积神经网络,进行验证码识别。  
+项目封装了比较通用的**校验、训练、验证、识别、API模块**,极大的减少了识别字符型验证码花费的时间和精力。 
+  
+项目已经帮助很多同学高效完成了验证码识别任务。
+如果你在使用过程中出现了bug和做了良好的改进,欢迎提出issue和PR,作者会尽快回复,希望能和你共同完善项目。 
+
+如果你需要识别点选、拖拽类验证码,或者有目标检测需求,也可以参考这个项目[nickliqian/darknet_captcha](https://github.com/nickliqian/darknet_captcha)。
+
+# 时间表
+#### 2018.11.12
+初版Readme.md  
+#### 2018.11.21
+加入关于验证码识别的一些说明  
+#### 2018.11.24
+优化校验数据集图片的规则  
+#### 2018.11.26
+新增`train_model_v2.py`文件,训练过程中同时输出训练集和验证集的准确率  
+#### 2018.12.06
+新增多模型部署支持,修复若干bug  
+#### 2018.12.08
+优化模型识别速度,支持api压力测试和统计耗时  
+#### 2019.02.19
+1. 新增一种准确率计算方式    
+2. TAG: v1.0
+#### 2019.04.12
+1. 只保留一种`train_model.py`文件
+2. 优化代码结构
+3. 把通用配置抽取到`sample_config.json`和`captcha_config.json`
+4. 修复若干大家在issue提出的问题
+#### 2019.06.01
+1. 完善readme文档,文档不长,请大家一定要读完~
+2. 使用cnnlib目录存放神经网络结构代码
+3. 做了一版训练数据统计,大家可以参考我们的训练次数、时长和准确率
+4. TAG: v2.0  
+#### fork后开发
+1. 新增配置,如识别服务器配置,请求服务器配置
+2. 新增海关年度个人额度验证码请求地址配置
+3. 优化了大量的代码
+4. 新增python日志打印
+
+# 目录
+<a href="#项目介绍">1 项目介绍</a>  
+- <a href="#关于验证码识别">1.1 关于验证码识别</a>  
+- <a href="#目录结构">1.2 目录结构</a>  
+- <a href="#依赖">1.3 依赖</a>  
+- <a href="#模型结构">1.4 模型结构</a>  
+
+<a href="#如何使用">2 如何使用</a>  
+- <a href="#数据集">2.1 数据集</a>  
+- <a href="#配置文件">2.2 配置文件</a>  
+- <a href="#验证和拆分数据集">2.3 验证和拆分数据集</a>  
+- <a href="#训练模型">2.4 训练模型</a>  
+- <a href="#批量验证">2.5 批量验证</a>  
+- <a href="#启动WebServer">2.6 启动WebServer</a>  
+- <a href="#调用接口识别">2.7 调用接口识别</a>  
+- <a href="#部署">2.8 部署</a>  
+- <a href="#部署多个模型">2.9 部署多个模型</a>  
+- <a href="#在线识别">2.10 在线识别</a>  
+
+<a href="#说明">3 统计数据</a>  
+- <a href="#训练数据统计">3.1 训练数据统计</a>  
+- <a href="#压力测试">3.2 压力测试</a>  
+
+<a href="#开发说明">4 开发说明</a>  
+
+<a href="#已知BUG">5 已知BUG</a>  
+
+
+
+# 1 项目介绍
+## 1.1 关于验证码识别
+验证码识别大多是爬虫会遇到的问题,也可以作为图像识别的入门案例。目前通常使用如下几种方法:  
+
+| 方法名称 | 相关要点 |
+| ------ | ------ |
+| tesseract | 仅适合识别没有干扰和扭曲的图片,训练起来很麻烦 |
+| 其他开源识别库 | 不够通用,识别率未知 |
+| 付费OCR API | 需求量大的情形成本很高 |
+| 图像处理+机器学习分类算法 | 涉及多种技术,学习成本高,且不通用 |
+| 卷积神经网络 | 一定的学习成本,算法适用于多类验证码 |
+
+这里说一下使用传统的**图像处理和机器学习算法**,涉及多种技术:  
+
+1. 图像处理
+- 前处理(灰度化、二值化)
+- 图像分割
+- 裁剪(去边框)
+- 图像滤波、降噪
+- 去背景
+- 颜色分离
+- 旋转
+2. 机器学习
+- KNN
+- SVM
+
+使用这类方法对使用者的要求较高,且由于图片的变化类型较多,处理的方法不够通用,经常花费很多时间去调整处理步骤和相关算法。  
+而使用**卷积神经网络**,只需要通过简单的前处理,就可以实现大部分静态字符型验证码的端到端识别,效果很好,通用性很高。  
+
+这里列出目前**常用的验证码**生成库:
+>参考:[Java验证全家桶](https://www.cnblogs.com/cynchanpin/p/6912301.html)  
+
+| 语言 | 验证码库名称 | 链接 | 样例 |
+| ------ | ------ | ------ | ------ |
+| Java | JCaptcha | [示例](https://jcaptcha.atlassian.net/wiki/spaces/general/pages/1212427/Samples+tests)  | ![效果1](./readme_image/jcaptcha1.jpg) ![效果2](./readme_image/jcaptcha2.jpg) ![效果3](./readme_image/jcaptcha3.jpg) |
+| Java | JCaptcha4Struts2 |  |  |
+| Java | SimpleCaptcha | [例子](https://www.oschina.net/p/simplecaptcha)   | ![效果1](./readme_image/SimpleCaptcha_1.jpg) ![效果2](./readme_image/SimpleCaptcha_2.jpg) ![效果3](./readme_image/SimpleCaptcha_3.jpg) |
+| Java | kaptcha | [例子](https://github.com/linghushaoxia/kaptcha) | ![水纹效果](./readme_image/Kaptcha_5.png) ![鱼眼效果](./readme_image/Kaptcha_2.png) ![阴影效果](./readme_image/Kaptcha_3.png) |
+| Java | patchca |  | ![效果1](./readme_image/patchca_1.png) |
+| Java | imageRandom |  |  |  
+| Java | iCaptcha |  | ![效果1](./readme_image/iCaptcha.jpg) |  
+| Java | SkewPassImage |  | ![效果1](./readme_image/SkewPassImage.jpg) |  
+| Java | Cage |  | ![效果1](./readme_image/Cage1.jpg) ![效果2](./readme_image/Cage2.jpg) |
+| Python | captcha | [例子](https://github.com/nickliqian/cnn_captcha/blob/master/gen_image/gen_sample_by_captcha.py) | ![py_Captcha](./readme_image/py_Captcha-1.jpg) |
+| Python | pycapt | [例子](https://github.com/aboutmydreams/pycapt) | ![pycapt](https://github.com/aboutmydreams/pycapt/raw/master/img/do4.png) |
+| PHP | Gregwar/Captcha | [文档](https://github.com/Gregwar/Captcha) |  |
+| PHP | mewebstudio/captcha | [文档](https://github.com/mewebstudio/captcha) |  |
+
+## 1.2 目录结构
+### 1.2.1 基本配置
+| 序号 | 文件名称 | 说明 |
+| ------ | ------ | ------ |
+| 1 | `conf/` | 配置文件目录 |
+| 2 | `sample/` | 数据集目录 |
+| 3 | `model/` | 模型文件目录 |
+| 4 | `cnnlib/` | 封装CNN的相关代码目录 |
+### 1.2.2 训练模型
+| 序号 | 文件名称 | 说明 |
+| ------ | ------ | ------ |
+| 1 | verify_and_split_data.py | 验证数据集、拆分数据为训练集和测试集 |
+| 2 | network.py | cnn网络基类 |
+| 3 | train_model.py | 训练模型 |
+| 4 | test_batch.py | 批量验证 |
+| 5 | gen_image/gen_sample_by_captcha.py | 生成验证码的脚本 |
+| 6 | gen_image/collect_labels.py | 用于统计验证码标签(常用于中文验证码) |
+
+### 1.2.3 web接口
+| 序号 | 文件名称 | 说明 |
+| ------ | ------ | ------ |
+| 1 | webserver_captcha_image.py | 获取验证码接口 |
+| 2 | webserver_recognize_api.py | 提供在线识别验证码接口 |
+| 3 | recognize_online.py | 使用接口识别的例子 |
+| 4 | recognize_local.py | 测试本地图片的例子 |
+| 5 | recognize_time_test.py | 压力测试识别耗时和请求响应耗时 |
+
+## 1.3 依赖
+```
+pip install -r requirements.txt
+```
+注意:如果需要使用GPU进行训练,请把文件中的tenforflow修改为tensorflow-gpu
+
+## 1.4 模型结构
+
+| 序号 | 层级 |
+| :------: | :------: |
+| 输入 | input |
+| 1 | 卷积层 + 池化层 + 降采样层 + ReLU  |
+| 2 | 卷积层 + 池化层 + 降采样层 + ReLU  |
+| 3 | 卷积层 + 池化层 + 降采样层 + ReLU  |
+| 4 | 全连接 + 降采样层 + Relu   |
+| 5 | 全连接 + softmax  |
+| 输出 | output  |
+
+# 2 如何使用
+## 2.1 数据集
+原始样本越多,生成模型识别率越高,一些作了旋转,切边的验证码,基本上需要一万个样本,才能达到90%以上的识别率。
+
+原始数据集可以存放在`sample/origin`目录中。  
+为了便于处理,图片最好以`2e8j_17322d3d4226f0b5c5a71d797d2ba7f7.jpg`格式命名(标签_序列号.后缀)。 
+  
+如果你没有训练集,你可以使用`gen_sample_by_captcha.py`文件生成训练集文件。
+生成之前你需要修改相关配置`conf/captcha_config.json`(路径、文件后缀、字符集等)。
+```
+{
+  "root_dir": "sample/origin/",  # 验证码保存路径
+  "image_suffix": "png",         # 验证码图片后缀
+  "characters": "0123456789",    # 生成验证码的可选字符
+  "count": 1000,                 # 生成验证码的图片数量
+  "char_count": 4,               # 每张验证码图片上的字符数量
+  "width": 100,                  # 图片宽度
+  "height": 60,                   # 图片高度
+  "webserver_captcha_url": "127.0.0.1",                   # 验证码服务ip
+  "webserver_captcha_port": 6100                   # 验证码服务端口
+}
+```
+
+## 2.2 配置文件
+创建一个新项目前,需要自行**修改相关配置文件**`conf/sample_config.json`。
+```
+{
+   "env": "prod",  # 环境,prod:生产环境,dev:开发环境
+  "origin_image_dir": "sample/origin/",  # 原始文件
+  "new_image_dir": "sample/new_train/",  # 新的训练样本
+  "train_image_dir": "sample/train/",    # 训练集
+  "test_image_dir": "sample/test/",      # 测试集
+  "api_image_dir": "sample/api/",        # api接收的图片储存路径
+  "local_origin_image_dir": "sample/local_origin/",    # 本地待识别图片的路径
+  "online_image_dir": "sample/online/",    # 远从远程验证码url获取的待识别图片,识别后保存图片的路径
+  "local_image_dir": "sample/local/",    # 本地识别后保存图片的路径
+  "model_save_dir": "model/",            # 从验证码url获取的图片的储存路径
+  "image_width": 100,                    # 图片宽度
+  "image_height": 60,                    # 图片高度
+  "max_captcha": 4,                      # 验证码字符个数
+  "image_suffix": "png",                 # 图片文件后缀
+  "char_set": "0123456789abcdefghijklmnopqrstuvwxyz",  # 验证码识别结果类别
+  "use_labels_json_file": false,                       # 是否开启读取`labels.json`内容
+  "webserver_recognize_url": "127.0.0.1",      # 识别服务器IP
+  "webserver_recognize_port": 6000,      # 识别服务器端口
+  "request_recognize_ip": "127.0.0.1",      # 请求识别远程验证码的服务ip,供外部程序调用
+  "request_recognize_port": 2000,      # 请求识别验证码的服务端口
+  "remote_url": "http://127.0.0.1:6100/captcha/",      # 验证码远程获取地址
+  "cycle_stop": 3000,                                  # 启动任务后的训练指定次数后停止
+  "acc_stop": 0.99,                                    # 训练到指定准确率后停止
+  "cycle_save": 500,                                   # 训练指定次数后定时保存模型
+  "enable_gpu": 0,                                     # 是否开启GUP训练
+  "train_batch_size": 128,                             # 训练时每次使用的图片张数,如果CPU或者GPU内存太小可以减少这个参数
+  "test_batch_size": 100                               # 每批次测试时验证的图片张数,不要超过验证码集的总数
+}
+
+```
+关于`验证码识别结果类别`,假设你的样本是中文验证码,你可以使用`tools/collect_labels.py`脚本进行标签的统计。
+会生成文件`gen_image/labels.json`存放所有标签,在配置文件中设置`use_labels_json_file = True`开启读取`labels.json`内容作为`结果类别`。
+
+## 2.3 验证和拆分数据集
+此功能会校验原始图片集的尺寸和测试图片是否能打开,并按照19:1的比例拆分出训练集和测试集。  
+所以需要分别创建和指定三个文件夹:origin,train,test用于存放相关文件。
+
+也可以修改为不同的目录,但是最好修改为绝对路径。  
+文件夹创建好之后,执行以下命令即可:
+```
+python3 verify_and_split_data.py
+```
+一般会有类似下面的提示
+```
+>>> 开始校验目录:[sample/origin/]
+开始校验原始图片集
+原始集共有图片: 1001张
+====以下1张图片有异常====
+[第0张图片] [.DStore] [文件后缀不正确]
+========end
+开始分离原始图片集为:测试集(5%)和训练集(95%)
+共分配1000张图片到训练集和测试集,其中1张为异常留在原始目录
+测试集数量为:50
+训练集数量为:950
+>>> 开始校验目录:[sample/new_train/]
+【警告】找不到目录sample/new_train/,即将创建
+开始校验原始图片集
+原始集共有图片: 0张
+====以下0张图片有异常====
+未发现异常(共 0 张图片)
+========end
+开始分离原始图片集为:测试集(5%)和训练集(95%)
+共分配0张图片到训练集和测试集,其中0张为异常留在原始目录
+测试集数量为:0
+训练集数量为:0
+```
+程序会同时校验和分割`origin_image_dir`和`new_image_dir`两个目录中的图片;后续有了更多的样本,可以把样本放在`new_image_dir`目录中再次执行`verify_and_split_data`。  
+程序会把无效的文件留在原文件夹。  
+
+此外,当你有新的样本需要一起训练,可以放在`sample/new`目录下,再次运行`python3 verify_and_split_data.py`即可。  
+需要注意的是,如果新的样本中有新增的标签,你需要把新的标签增加到`char_set`配置中或者`labels.json`文件中。 
+ 
+## 2.4 训练模型
+创建好训练集和测试集之后,就可以开始训练模型了。  
+训练的过程中会输出日志,日志展示当前的训练轮数、准确率和loss。  
+**此时的准确率是训练集图片的准确率,代表训练集的图片识别情况**  
+例如:
+```
+第10次训练 >>> 
+[训练集] 字符准确率为 0.03000 图片准确率为 0.00000 >>> loss 0.1698757857
+[验证集] 字符准确率为 0.04000 图片准确率为 0.00000 >>> loss 0.1698757857
+```
+字符准确率和图片准确率的解释:
+```
+假设:有100张图片,每张图片四个字符,共400个字符。我们这里把任务拆分为为需要识别400个字符
+字符准确率:识别400的字符中,正确字符的占比。
+图片准确率:100张图片中,4个字符完全识别准确的图片占比。
+```
+这里不具体介绍tensorflow安装相关问题,直奔主题。  
+确保图片相关参数和目录设置正确后,执行以下命令开始训练:
+```
+python3 train_model.py
+```
+也可以根据`train_model.py`的`main`函数中的代码调用类开始训练或执行一次简单的识别演示。  
+
+由于训练集中常常不包含所有的样本特征,所以会出现训练集准确率是100%而测试集准确率不足100%的情况,此时提升准确率的一个解决方案是增加正确标记后的负样本。
+
+## 2.5 批量验证
+使用测试集的图片进行验证,输出准确率。  
+```
+python3 test_batch.py
+```
+同样可以根据`main`函数中的代码调用类开始验证。
+
+## 2.6 启动WebServer
+项目已经封装好加载模型和识别图片的类,启动`web server`后调用接口就可以使用识别服务。  
+启动`web server`
+```
+python3 webserver_recognize_api.py
+```
+接口url为`http://127.0.0.1:6000/b`
+
+## 2.7 调用接口识别
+使用requests调用拉取验证码的接口:
+```
+url = "http://127.0.0.1:6000/b"
+files = {'image_file': (image_file_name, open('captcha.jpg', 'rb'), 'application')}
+r = requests.post(url=url, files=files)
+```
+返回的结果是一个json:
+```
+{
+    'time': '1542017705.9152594',
+    'value': 'jsp1',
+}
+```
+文件`recognize_local.py`是使用接口识别本地的例子,这个例子运行成功,那么识别验证码的一套流程基本上是走了一遍了。  
+在线识别验证码是显示中常用场景,文件`recognize_online.py`是使用接口在线识别的例子,参见:`## 2.11 在线识别`。
+
+## 2.8 部署
+1. 拉取外部远程验证码图片并通过CNN识别
+部署的时候,配置`sample_config.json`文件的`webserver_recognize_url`可以把外部远程拉取到的验证码进行识别。
+配置识别服务器如下:
+```
+  "webserver_recognize_url": "127.0.0.1",
+  "webserver_recognize_port": 5000,
+```
+```
+请求地址为:http://webserver_recognize_url:webserver_recognize_port/b
+程序已经在代码里作了拼接
+```
+然后开启端口访问权限,就可以进行识别了。  
+2. 应用程序调用访问
+你的应用程序通过url访问,得到识别后的验证码文本,只针对远程拉取的验证码图片有效,本地验证码不会使用。
+启动`recognize_online.py`文件后由Flask生成的服务url,访问url后可以得到识别出来的验证码。
+Flask启动服务配置地址在`sample_config.json`文件,如下
+```
+  "request_recognize_ip": "127.0.0.1",
+  "request_recognize_port": 2000,
+```
+```
+访问地址为:http://request_recognize_ip:request_recognize_port/rec
+```
+
+另外为了开启多进程处理请求,可以使用uwsgi+nginx组合进行部署。  
+这部分可以参考:[Flask部署选择](http://docs.jinkan.org/docs/flask/deploying/index.html)
+
+## 2.9 部署多个模型
+部署多个模型:
+在`webserver_recognize_api.py`文件汇总,新建一个Recognizer对象;  
+并参照原有`up_image`函数编写的路由和识别逻辑。
+```
+Q = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)
+```
+注意修改这一行:
+```
+value = Q.rec_image(img)
+```
+
+## 2.10 在线识别
+在线识别验证码是显示中常用场景,即实时获取目标验证码来调用接口进行识别。  
+为了测试的完整性,这里搭建了一个验证码获取接口,通过执行下面的命令启动:  
+```
+python webserver_captcha_image.py
+```
+启动后通过访问此地址:`http://127.0.0.1:6100/captcha/`可以接收到验证码图片的二进制流文件。  
+具体进行在线识别任务的demo参见:`recognize_online.py`。  
+
+## 2.11 日志配置
+增加了python日志打印,请在日志文件log_ware.py内配置日志相关参数
+
+
+# 3 数据统计
+## 3.1 训练数据统计
+由于很多同学提出,“需要训练多久呀?”、“准确率可以达到多少?”、“为什么我的准确率一直是0?”类似的疑问。  
+这一小节,使用默认配置(2019.06.02),把训练过程中的数据做了统计,给大家做一个展示。  
+本次测试条件如下:
+- 验证码:本项目自带生成验证码程序,数字+小写英文
+- 数量:20000张
+- 计算引擎:GPU
+- GPU型号:笔记本,GTX 950X 2G显卡
+  
+经过测试:
+5000次,25分钟,**训练集**字符准确率84%,图片准确率51%;  
+9190次,46分钟,**训练集**字符准确率100%,图片准确率100%;  
+12000,60分钟,**测试集**的准确率基本上已经跑不动了。  
+
+使用`test_batch.py`测试,日志如下:  
+```
+100个样本识别耗时6.513171672821045秒,准确率37.0%
+```
+有37%的准确率,可以说是识别成功的第一步了。  
+
+曲线图如下:  
+训练集-  
+![train_acc](readme_image/train_acc.png) 
+   
+测试集-   
+![test_acc](readme_image/test_acc.png)  
+
+
+## 3.2 压力测试和统计数据
+提供了一个简易的压力测试脚本,可以统计api运行过程中识别耗时和请求耗时的相关数据,不过图需要自己用Excel拉出来。  
+打开文件`recognize_time_test.py`,修改`main`函数下的`test_file`路径,这里会重复使用一张图片来访问是被接口。  
+最后数据会储存在test.csv文件中。  
+使用如下命令运行:  
+```
+python3 recognize_time_test.py
+----输出如下
+2938,5150,13:30:25,总耗时:29ms,识别:15ms,请求:14ms
+2939,5150,13:30:25,总耗时:41ms,识别:21ms,请求:20ms
+2940,5150,13:30:25,总耗时:47ms,识别:16ms,请求:31ms
+```
+这里对一个模型进行了两万次测试后,一组数据test.csv。
+把test.csv使用箱线图进行分析后可以看到:  
+![压力测试结果](readme_image/压力测试结果.png)  
+- 单次请求API总耗时(平均值):27ms  
+- 单次识别耗时(平均值):12ms  
+- 每次请求耗时(平均值):15ms  
+其中有:请求API总耗时 = 识别耗时 + 请求耗时  
+
+# 4 开发说明
+- 20190209  
+1. 目前tensorboard展示支持的不是很好。
+- 20190601
+1. 最近比较忙,issue回的有点慢,请大家见谅
+2. dev分支开发到一半一直没时间弄,今天儿童节花了一下午时间更新了一下:)
+3. 感谢看到这里的你,谢谢你的支持
+
+# 4 已知BUG
+1. 使用pycharm启动recognize_api.py文件报错
+```
+2018-12-01 00:35:15.106333: W T:\src\github\tensorflow\tensorflow\core\framework\op_kernel.cc:1273] OP_REQUIRES failed at save_restore_tensor.cc:170 : Invalid argument: Unsuccessful TensorSliceReader constructor: Failed to get matching files on ./model/: Not found: FindFirstFile failed for: ./model : ϵͳ�Ҳ���ָ����·����
+; No such process
+......
+tensorflow.python.framework.errors_impl.InvalidArgumentError: Unsuccessful TensorSliceReader constructor: Failed to get matching files on ./model/: Not found: FindFirstFile failed for: ./model : ϵͳ\udcd5Ҳ\udcbb\udcb5\udcbdָ\udcb6\udca8\udcb5\udcc4·\udcbe\udcb6\udca1\udca3
+; No such process
+	 [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
+```
+由pycharm默认设置了工作空间,导致读取相对路径的model文件夹出错。
+解决办法:编辑运行配置,设置工作空间为项目目录即可。
+![bug_api启动失败](readme_image/bug_api启动失败.png)
+
+2. FileNotFoundError: [Errno 2] No such file or directory: 'xxxxxx'  
+目录下有文件夹不存在,在指定目录创建好文件夹即可。
+
+3. api程序在运行过程中内存越占越大  
+结果查阅资料:[链接](https://blog.csdn.net/The_lastest/article/details/81130500)  
+在迭代循环时,不能再包含任何张量的计算表达式,否在会内存溢出。
+将张量的计算表达式放到init初始化执行后,识别速度得到极大的提升。
+
+4. 加载多个模型报错
+原因是两个Recognizer对象都使用了默认的Graph。
+解决办法是在创建对象的时候不使用默认Graph,新建graph,这样每个Recognizer都使用不同的graph,就不会冲突了。
+
+5. Flask程序用于生产
+可以参考官方文档:[Flask的生产配置](http://docs.jinkan.org/docs/flask/config.html)
+
+6. OOM happens
+```
+Hint: If you want to see a list of allocated tensors when OOM happens,
+add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
+```
+尽可能关闭其他占用GPU或者CPU的任务,或者减小`sample_config.json`中的`train_batch_size`参数。

+ 104 - 0
cnnlib/network.py

@@ -0,0 +1,104 @@
+import tensorflow as tf
+import numpy as np
+
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+
+class CNN(object):
+    def __init__(self, image_height, image_width, max_captcha, char_set, model_save_dir):
+        # 初始值
+        self.image_height = image_height
+        self.image_width = image_width
+        self.max_captcha = max_captcha
+        self.char_set = char_set
+        self.char_set_len = len(char_set)
+        self.model_save_dir = model_save_dir  # 模型路径
+        with tf.name_scope('parameters'):
+            self.w_alpha = 0.01
+            self.b_alpha = 0.1
+        # tf初始化占位符
+        with tf.name_scope('data'):
+            self.X = tf.placeholder(tf.float32, [None, self.image_height * self.image_width])  # 特征向量
+            self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len])  # 标签
+            self.keep_prob = tf.placeholder(tf.float32)  # dropout值
+
+    @staticmethod
+    def convert2gray(img):
+        """
+        图片转为灰度图,如果是3通道图则计算,单通道图则直接返回
+        :param img:
+        :return:
+        """
+        if len(img.shape) > 2:
+            r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
+            gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
+            return gray
+        else:
+            return img
+
+    def text2vec(self, text):
+        """
+        转标签为oneHot编码
+        :param text: str
+        :return: numpy.array
+        """
+        text_len = len(text)
+        if text_len > self.max_captcha:
+            raise ValueError('验证码最长{}个字符'.format(self.max_captcha))
+
+        vector = np.zeros(self.max_captcha * self.char_set_len)
+
+        for i, ch in enumerate(text):
+            idx = i * self.char_set_len + self.char_set.index(ch)
+            vector[idx] = 1
+        return vector
+
+    def model(self):
+        x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1])
+        logger.debug(">>> input x: %s", x)
+
+        # 卷积层1
+        wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
+                              initializer=tf.contrib.layers.xavier_initializer())
+        bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]))
+        conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
+        conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+        conv1 = tf.nn.dropout(conv1, self.keep_prob)
+
+        # 卷积层2
+        wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
+                              initializer=tf.contrib.layers.xavier_initializer())
+        bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]))
+        conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
+        conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+        conv2 = tf.nn.dropout(conv2, self.keep_prob)
+
+        # 卷积层3
+        wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
+                              initializer=tf.contrib.layers.xavier_initializer())
+        bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]))
+        conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
+        conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+        conv3 = tf.nn.dropout(conv3, self.keep_prob)
+        logger.debug(">>> convolution 3: %s", conv3.shape)
+        next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]
+
+        # 全连接层1
+        wd1 = tf.get_variable(name='wd1', shape=[next_shape, 1024], dtype=tf.float32,
+                              initializer=tf.contrib.layers.xavier_initializer())
+        bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]))
+        dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
+        dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
+        dense = tf.nn.dropout(dense, self.keep_prob)
+
+        # 全连接层2
+        wout = tf.get_variable('name', shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,
+                               initializer=tf.contrib.layers.xavier_initializer())
+        bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]))
+
+        with tf.name_scope('y_prediction'):
+            y_predict = tf.add(tf.matmul(dense, wout), bout)
+
+        return y_predict

+ 81 - 0
cnnlib/recognition_object.py

@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+"""
+识别图像的类,为了快速进行多次识别可以调用此类下面的方法:
+R = Recognizer(image_height, image_width, max_captcha)
+for i in range(10):
+    r_img = Image.open(str(i) + ".jpg")
+    t = R.rec_image(r_img)
+简单的图片每张基本上可以达到毫秒级的识别速度
+"""
+import tensorflow as tf
+import numpy as np
+from PIL import Image
+from cnnlib.network import CNN
+import json
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+
+class Recognizer(CNN):
+    def __init__(self, image_height, image_width, max_captcha, char_set, model_save_dir):
+        # 初始化变量
+        super(Recognizer, self).__init__(image_height, image_width, max_captcha, char_set, model_save_dir)
+
+        # 新建图和会话
+        self.g = tf.Graph()
+        self.sess = tf.Session(graph=self.g)
+        # 使用指定的图和会话
+        with self.g.as_default():
+            # 迭代循环前,写出所有用到的张量的计算表达式,如果写在循环中,会发生内存泄漏,拖慢识别的速度
+            # tf初始化占位符
+            self.X = tf.placeholder(tf.float32, [None, self.image_height * self.image_width])  # 特征向量
+            self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len])  # 标签
+            self.keep_prob = tf.placeholder(tf.float32)  # dropout值
+            # 加载网络和模型参数
+            self.y_predict = self.model()
+            self.predict = tf.argmax(tf.reshape(self.y_predict, [-1, self.max_captcha, self.char_set_len]), 2)
+            saver = tf.train.Saver()
+            with self.sess.as_default() as sess:
+                saver.restore(sess, self.model_save_dir)
+
+    # def __del__(self):
+    #     self.sess.close()
+    #     logger.debug("session close")
+
+    def rec_image(self, img):
+        # 读取图片
+        img_array = np.array(img)
+        test_image = self.convert2gray(img_array)
+        test_image = test_image.flatten() / 255
+        # 使用指定的图和会话
+        with self.g.as_default():
+            with self.sess.as_default() as sess:
+                text_list = sess.run(self.predict, feed_dict={self.X: [test_image], self.keep_prob: 1.})
+
+        # 获取结果
+        predict_text = text_list[0].tolist()
+        p_text = ""
+        for p in predict_text:
+            p_text += str(self.char_set[p])
+
+        # 返回识别结果
+        return p_text
+
+
+def main():
+    with open("conf/sample_config.json", "r", encoding="utf-8") as f:
+        sample_conf = json.load(f)
+    image_height = sample_conf["image_height"]
+    image_width = sample_conf["image_width"]
+    max_captcha = sample_conf["max_captcha"]
+    char_set = sample_conf["char_set"]
+    model_save_dir = sample_conf["model_save_dir"]
+    R = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)
+    r_img = Image.open("./sample/test/2b3n_6915e26c67a52bc0e4e13d216eb62b37.jpg")
+    t = R.rec_image(r_img)
+    logger.debug(t)
+
+
+if __name__ == '__main__':
+    main()

+ 11 - 0
conf/captcha_config.json

@@ -0,0 +1,11 @@
+{
+  "root_dir": "sample/origin/",
+  "image_suffix": "png",
+  "characters": "0123456789abcdefghijklmnopqrstuvwxyz",
+  "count": 20000,
+  "char_count": 4,
+  "width": 100,
+  "height": 60,
+  "webserver_captcha_url": "127.0.0.1",
+  "webserver_captcha_port": 6100
+}

+ 29 - 0
conf/sample_config.json

@@ -0,0 +1,29 @@
+{
+  "env": "dev",
+  "origin_image_dir": "sample/origin/",
+  "new_image_dir": "sample/new_train/",
+  "train_image_dir": "sample/train/",
+  "test_image_dir": "sample/test/",
+  "api_image_dir": "sample/api/",
+  "local_origin_image_dir": "sample/local_origin/",
+  "online_image_dir": "sample/online/",
+  "local_image_dir": "sample/local/",
+  "model_save_dir": "model/",
+  "image_width": 100,
+  "image_height": 35,
+  "max_captcha": 4,
+  "image_suffix": "jpg",
+  "char_set": "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+  "use_labels_json_file": false,
+  "webserver_recognize_url": "127.0.0.1",
+  "webserver_recognize_port": 5000,
+  "request_recognize_ip": "127.0.0.1",
+  "request_recognize_port": 2000,
+  "remote_url": "https://app.singlewindow.cn/ceb2pubweb//verifyCode/creator",
+  "cycle_stop": 6000,
+  "acc_stop": 0.99,
+  "cycle_save": 20,
+  "enable_gpu": 0,
+  "train_batch_size": 180,
+  "test_batch_size": 150
+}

+ 60 - 0
conf/sample_config.md

@@ -0,0 +1,60 @@
+# sample config配置说明
+
+## 环境配置
+```
+"env": "prod"  # 环境,prod:生产环境,dev:开发环境
+```
+## 图片文件夹
+```
+origin_image_dir = "sample/origin/"  # 原始文件
+train_image_dir = "sample/train/"   # 训练集
+test_image_dir = "sample/test/"   # 测试集
+api_image_dir = "sample/api/"   # api接收的图片保存路径
+local_origin_image_dir = "sample/local_origin/"  # 本地待识别图片的路径
+new_image_dir = "sample/new_train/",  # 新的训练样本
+online_image_dir = "sample/online/"  # 远从远程验证码url获取的待识别图片,识别后保存图片的路径
+local_image_dir = "sample/local/"  # 本地识别后保存图片的路径
+
+```
+## 模型文件夹
+```
+model_save_dir = "model/"  # 训练好的模型储存路径
+```
+## 图片相关参数
+```
+image_width = 80  # 图片宽度
+image_height = 40  # 图片高度
+max_captcha = 4  # 验证码字符个数
+image_suffix = "jpg"  # 图片文件后缀
+```
+## 是否从文件中的导入标签
+```
+use_labels_json_file = False
+```
+## 验证码字符相关参数
+```
+char_set = "0123456789abcdefghijklmnopqrstuvwxyz"
+char_set = "abcdefghijklmnopqrstuvwxyz"
+char_set = "0123456789"
+```
+## 创建本地识别服务器
+```
+webserver_recognize_url = "127.0.0.1",
+webserver_recognize_port = 6000,
+```
+## 外部程序调用的的服务器
+```
+request_recognize_ip = "127.0.0.1",
+request_recognize_port = 2000,
+```
+## 在线识别远程验证码地址
+```
+remote_url = "http://127.0.0.1:6100/captcha/"
+```
+## 训练相关参数
+```
+cycle_stop = 3000  # 到指定迭代次数后停止
+acc_stop = 0.99  # 到指定准确率后停止
+cycle_save = 500  # 每训练指定轮数就保存一次(覆盖之前的模型)
+enable_gpu = 0  # 使用GPU还是CPU,使用GPU需要安装对应版本的tensorflow-gpu==1.7.0
+```

+ 57 - 0
gen_sample_by_captcha.py

@@ -0,0 +1,57 @@
+# -*- coding: UTF-8 -*-
+"""
+使用captcha lib生成验证码(前提:pip3 install captcha)
+"""
+from captcha.image import ImageCaptcha
+import os
+import random
+import time
+import json
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+
+def gen_special_img(text, file_path, width, height):
+    # 生成img文件
+    generator = ImageCaptcha(width=width, height=height)  # 指定大小
+    img = generator.generate_image(text)  # 生成图片
+    img.save(file_path)  # 保存图片
+
+
+def gen_ima_by_batch(root_dir, image_suffix, characters, count, char_count, width, height):
+    # 判断文件夹是否存在
+    if not os.path.exists(root_dir):
+        os.makedirs(root_dir)
+
+    for index, i in enumerate(range(count)):
+        text = ""
+        for j in range(char_count):
+            text += random.choice(characters)
+
+        timec = str(time.time()).replace(".", "")
+        p = os.path.join(root_dir, "{}_{}.{}".format(text, timec, image_suffix))
+        gen_special_img(text, p, width, height)
+
+        logger.debug("Generate captcha image => %d", index + 1)
+
+
+def main():
+    with open("conf/captcha_config.json", "r") as f:
+        config = json.load(f)
+    # 配置参数
+    root_dir = config["root_dir"]  # 图片储存路径
+    image_suffix = config["image_suffix"]  # 图片储存后缀
+    characters = config["characters"]  # 图片上显示的字符集 # characters = "0123456789abcdefghijklmnopqrstuvwxyz"
+    count = config["count"]  # 生成多少张样本
+    char_count = config["char_count"]  # 图片上的字符数量
+
+    # 设置图片高度和宽度
+    width = config["width"]
+    height = config["height"]
+
+    gen_ima_by_batch(root_dir, image_suffix, characters, count, char_count, width, height)
+
+
+if __name__ == '__main__':
+    main()

+ 95 - 0
log_ware.py

@@ -0,0 +1,95 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+#
+#
+# author: Scott Chen
+# date: 2020-01-03
+
+import os
+import time
+import logging
+from logging.handlers import RotatingFileHandler
+
+
+class LogWare:
+
+    def __init__(self):
+        # 每日志文件大小KB
+        self.__log_size = 3 * 1024 * 1024
+        # 日志备份数量
+        self.__backup_count = 100
+        # 日志备份目录
+        self.__log_dir_half = 'd:/data/log/'
+        # 日志备份使用的项目名称
+        self.__project_name = 'cnn-captcha'
+
+        self.__log_dir = self.get_log_dir()
+
+        # 是否存在目录,不存在就创建
+        mkdir_with_lambda = lambda x: os.makedirs(x) if not os.path.exists(x) else True
+        mkdir_with_lambda(self.__log_dir)
+
+        # 日志文件名前缀部分,全文件名为 名称_四位年+两位月+2位日.log
+        # 备份日志文件名 为日志名后面加顺序数据
+        self.__log_file_prefix = 'my-log'
+        # 日志打印格式
+        self.__formatter = logging.Formatter(
+            '[%(asctime)s][%(threadName)s][%(levelname)s][%(filename)s][%(lineno)d] %(message)s', '%Y-%m-%d %H:%M:%S')
+
+
+        self.__timestamp = time.strftime("%Y-%m-%d", time.localtime())
+        self.__log_file_name = '{}_{}.log'.format(self.__log_file_prefix, self.__timestamp)
+
+        self.__log_path = os.path.join(self.__log_dir, self.__log_file_name)
+
+        if os.path.exists(self.__log_dir) and os.path.isdir(self.__log_dir):
+            pass
+        else:
+            os.makedirs(self.__log_dir)
+
+        self.__logger = logging.getLogger(self.__log_file_name)
+        # 坑坑! 此处要整体设置 logger level = DEBUG,否则后面在不同的 handler 中设置 level 无效
+        self.__logger.setLevel(logging.DEBUG)
+
+    def get_logger(self):
+
+        # 控制台
+        console_handler = logging.StreamHandler()
+        # console_handler.setLevel(logging.DEBUG)
+        console_handler.setFormatter(self.__formatter)
+
+        # 此处通过文件大小截断日志文件,如果想要通过时间截断,可以使用 TimedRotatingFileHandler 这个类
+        file_handler = logging.handlers.RotatingFileHandler(filename=self.__log_path, mode='a',
+                                                            maxBytes=self.__log_size,
+                                                            encoding='utf8', backupCount=self.__backup_count)
+        file_handler.setFormatter(self.__formatter)
+
+        # 日志文件输出
+        self.__logger.addHandler(file_handler)
+        self.__logger.addHandler(console_handler)
+        self.__logger.setLevel(logging.DEBUG)
+        return self.__logger
+
+    def get_log_dir(self):
+        log_dir = os.path.join(self.__log_dir_half, self.__project_name)
+        return log_dir
+
+
+# ########## 单元测试 ##########
+# 外部使用时,引入
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+if __name__ == '__main__':
+    pass
+    i = 0
+    while True:
+        logger.debug("level debug %s %s %d", 'aaaa', 'bbb', 10)
+        logger.info("level info")
+        logger.warning('level warning')
+        logger.error("level error")
+        logger.critical('level critical')
+
+        i += 1
+        if i == 10: break

TEMPAT SAMPAH
readme_image/Cage1.jpg


TEMPAT SAMPAH
readme_image/Cage2.jpg


TEMPAT SAMPAH
readme_image/Kaptcha_2.png


TEMPAT SAMPAH
readme_image/Kaptcha_3.png


TEMPAT SAMPAH
readme_image/Kaptcha_5.png


TEMPAT SAMPAH
readme_image/SimpleCaptcha_1.jpg


TEMPAT SAMPAH
readme_image/SimpleCaptcha_2.jpg


TEMPAT SAMPAH
readme_image/SimpleCaptcha_3.jpg


TEMPAT SAMPAH
readme_image/SkewPassImage.jpg


TEMPAT SAMPAH
readme_image/bug_api启动失败.png


TEMPAT SAMPAH
readme_image/iCaptcha.jpg


TEMPAT SAMPAH
readme_image/jcaptcha1.jpg


TEMPAT SAMPAH
readme_image/jcaptcha2.jpg


TEMPAT SAMPAH
readme_image/jcaptcha3.jpg


TEMPAT SAMPAH
readme_image/patchca_1.png


TEMPAT SAMPAH
readme_image/py_Captcha-1.jpg


TEMPAT SAMPAH
readme_image/test_acc.png


TEMPAT SAMPAH
readme_image/train_acc.png


TEMPAT SAMPAH
readme_image/压力测试结果.png


+ 76 - 0
recognize_local.py

@@ -0,0 +1,76 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+使用自建的接口识别来自本地的验证码
+需要配置参数:
+    remote_url = "https://www.xxxxxxx.com/getImg"  验证码链接地址
+    rec_times = 1  识别的次数
+"""
+import datetime
+import requests
+from io import BytesIO
+import time
+import json
+import os
+
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+with open("conf/sample_config.json", "r") as f:
+    sample_conf = json.load(f)
+
+# 配置参数
+env = sample_conf["env"]  # 环境
+local_save_path = sample_conf["local_image_dir"]  # 本地识别后的保存路径
+local_origin_image_dir = sample_conf["local_origin_image_dir"]  # 本地待识别图片的路径
+image_suffix = sample_conf["image_suffix"]  # 文件后缀
+
+webserver_recognize_url = sample_conf['webserver_recognize_url']  # 识别服务器IP
+webserver_recognize_port = sample_conf['webserver_recognize_port']  # 识别服务器端口
+
+
+def recognize_captcha(test_path, save_path, image_suffix):
+    image_file_name = 'captcha.{}'.format(image_suffix)
+
+    with open(test_path, "rb") as f:
+        content = f.read()
+
+    # 识别
+    s = time.time()
+    url = "http://{}:{}/b".format(webserver_recognize_url, str(webserver_recognize_port))
+    files = {'image_file': (image_file_name, BytesIO(content), 'application')}
+    r = requests.post(url=url, files=files)
+    e = time.time()
+
+    # 识别结果
+    logger.debug("本地图片,调用本地识别服务,接口响应: %s", r.text)
+    predict_text = json.loads(r.text)["value"]
+    now_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    logger.debug("【%s】 耗时:%s ms 预测结果:%s", now_time, int((e - s) * 1000), predict_text)
+
+    if env.lower() == 'dev':
+        # 保存文件
+        img_name = "{}_{}.{}".format(predict_text, str(time.time()).replace(".", ""), image_suffix)
+        path = os.path.join(save_path, img_name)
+        with open(path, "wb") as f:
+            f.write(content)
+
+    logger.debug("============== local recognized end ==============")
+
+
+def main():
+    files = os.listdir(local_origin_image_dir)
+    if len(files) <= 0:
+        logger.debug("%s,%s", local_origin_image_dir, "没有任何文件")
+        logger.debug("...没有任何文件")
+        return
+
+    for file in files:
+        test_path = "{}{}".format(local_origin_image_dir, file)
+        logger.debug("test_path: %s", test_path)
+        recognize_captcha(test_path, local_save_path, image_suffix)
+
+
+if __name__ == '__main__':
+    main()

+ 131 - 0
recognize_online.py

@@ -0,0 +1,131 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+使用自建的接口识别来自网络的验证码
+需要配置参数:
+    remote_url = "https://www.xxxxxxx.com/getImg"  验证码链接地址
+    rec_times = 1  识别的次数
+"""
+import datetime
+import requests
+from io import BytesIO
+import time
+import json
+import os
+from flask import Flask, request, Response
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+
+class RecognizeOnlineError(Exception):
+    pass
+
+
+# Flask对象
+app = Flask(__name__)
+basedir = os.path.abspath(os.path.dirname(__file__))
+
+# 客户端连接传递的cookie
+jsession_id = ''
+
+with open("conf/sample_config.json", "r") as f:
+    sample_conf = json.load(f)
+
+# 配置参数
+env = sample_conf["env"]  # 环境
+remote_url = sample_conf["remote_url"]  # 网络验证码地址
+image_suffix = sample_conf["image_suffix"]  # 文件后缀
+online_save_path = sample_conf["online_image_dir"]  # 远从远程验证码url获取的待识别图片,识别后保存图片的路径
+image_suffix = sample_conf["image_suffix"]  # 文件后缀
+
+webserver_recognize_url = sample_conf['webserver_recognize_url']  # 识别服务器IP
+webserver_recognize_port = sample_conf['webserver_recognize_port']  # 识别服务器端口
+
+request_recognize_ip = sample_conf['request_recognize_ip']  # 识别服务器IP,供外部程序调用
+request_recognize_port = sample_conf['request_recognize_port']  # 识别服务器端口
+
+
+def recognize_captcha(jsession_id, remote_url, rec_times, save_path, image_suffix):
+    image_file_name = 'captcha.{}'.format(image_suffix)
+
+    # 根据实际需求配置headers
+    headers = {
+        # 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
+        "Host": "app.singlewindow.cn",
+        'Referer': 'https://app.singlewindow.cn/ceb2pubweb/sw/personalAmount',
+        'Cookie': "jsessionid={}".format(jsession_id)
+    }
+
+    for index in range(rec_times):
+        # 请求
+        while True:
+            try:
+                req_url = "{}?timeStamp={}".format(remote_url, int(round(time.time() * 1000)))
+                response = requests.request("GET", req_url, headers=headers, timeout=6)
+                if response.text:
+                    break
+                else:
+                    logger.warn("retry, response.text is empty")
+            except Exception as ee:
+                logger.error(ee)
+
+        # 识别
+        s = time.time()
+        url = "http://{}:{}/b".format(webserver_recognize_url, str(webserver_recognize_port))
+        files = {'image_file': (image_file_name, BytesIO(response.content), 'application')}
+        r = requests.post(url=url, files=files)
+        e = time.time()
+
+        # 识别结果
+        logger.debug("远程下载图片,调用本地识别服务,接口响应: %s", r.text)
+        predict_text = json.loads(r.text)["value"]
+        now_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        logger.debug("【%s】 index:%d 耗时:%s ms 预测结果:%s", now_time, index, int((e - s) * 1000), predict_text)
+
+        if env.lower() == 'dev':
+            # 保存文件
+            img_name = "{}_{}.{}".format(predict_text, str(time.time()).replace(".", ""), image_suffix)
+            path = os.path.join(save_path, img_name)
+            with open(path, "wb") as f:
+                f.write(response.content)
+        logger.debug("============== online recognized end ==============")
+
+        return predict_text
+
+
+def response_headers(content):
+    resp = Response(content)
+    resp.headers['Access-Control-Allow-Origin'] = '*'
+    return resp
+
+
+@app.route('/rec', methods=['GET', 'POST'])
+def request_recongnize():
+    if (request.method == 'POST' or request.method == 'GET'):
+        if (request.method == 'POST' and request.form['jsessionid']):
+            jsession_id = request.form['jsessionid']
+        elif (request.method == 'GET' and request.args.get('jsessionid')):
+            jsession_id = request.args.get('jsessionid')
+        else:
+            logger.debug("缺少请求参数jsessionid")
+            content = json.dumps({"error_code": "1002", "error_msg": "缺少请求参数jsessionid"})
+            resp = response_headers(content)
+            return resp
+
+        rec_times = 1
+        captcha_text = recognize_captcha(jsession_id, remote_url, rec_times, online_save_path, image_suffix)
+        content = json.dumps({"captcha_text": captcha_text, "jsessionid": jsession_id})
+        logger.debug("返回验证码:%s,请求jsessionid:%s", captcha_text, jsession_id)
+    else:
+        content = json.dumps({"error_code": "1000", "error_msg": "只能是GET,POST请求"})
+    resp = response_headers(content)
+    return resp
+
+
+if __name__ == '__main__':
+    app.run(
+        host=request_recognize_ip,
+        port=request_recognize_port,
+        debug=True
+    )

+ 78 - 0
recognize_time_test.py

@@ -0,0 +1,78 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+使用自建的接口识别来自本地的验证码,测试识别某个文件耗时
+需要配置参数:
+    remote_url = "https://www.xxxxxxx.com/getImg"  验证码链接地址
+    rec_times = 1  识别的次数
+"""
+import datetime
+import requests
+from io import BytesIO
+import time
+import json
+import os
+
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+with open("conf/sample_config.json", "r") as f:
+    sample_conf = json.load(f)
+
+# 配置参数
+env = sample_conf["env"]  # 环境
+webserver_recognize_url = sample_conf['webserver_recognize_url']  # 识别服务器IP
+webserver_recognize_port = sample_conf['webserver_recognize_port']  # 识别服务器端口
+
+
+def recognize_captcha(index, test_path, save_path, image_suffix):
+    image_file_name = 'captcha.{}'.format(image_suffix)
+
+    with open(test_path, "rb") as f:
+        content = f.read()
+
+    # 识别
+    s = time.time()
+    url = "http://{}:{}/b".format(webserver_recognize_url, str(webserver_recognize_port))
+    files = {'image_file': (image_file_name, BytesIO(content), 'application')}
+    r = requests.post(url=url, files=files)
+    e = time.time()
+
+    # 测试参数
+    result_dict = json.loads(r.text)["value"]  # 响应
+    predict_text = result_dict["value"]  # 识别结果
+    whole_time_for_work = int((e - s) * 1000)
+    speed_time_by_rec = result_dict["speed_time(ms)"]  # 模型识别耗时
+    request_time_by_rec = whole_time_for_work - speed_time_by_rec  # 请求耗时
+    now_time = datetime.datetime.now().strftime('%Y-%m-%d@%H:%M:%S')  # 当前时间
+
+    # 记录日志
+    log = "{},{},{},{},{},{}\n" \
+        .format(index, predict_text, now_time, whole_time_for_work, speed_time_by_rec, request_time_by_rec)
+    with open("./test.csv", "a+") as f:
+        f.write(log)
+
+    # 输出结果到控制台
+    logger.debug("次数:%s, 结果:%s, 时刻:%s, 总耗时:%s ms, 识别:%s ms, 请求:%s ms",
+                 index, predict_text, now_time, whole_time_for_work, speed_time_by_rec, request_time_by_rec)
+
+    if env.lower() == 'dev':
+        # 保存文件
+        img_name = "{}_{}.{}".format(predict_text, str(time.time()).replace(".", ""), image_suffix)
+        path = os.path.join(save_path, img_name)
+        with open(path, "wb") as f:
+            f.write(content)
+
+
+def main():
+    # 配置相关参数
+    test_file = "sample/test/0001_15430304076164024.png"  # 测试识别的图片路径
+    save_path = sample_conf["local_image_dir"]  # 保存的地址
+    image_suffix = sample_conf["image_suffix"]  # 文件后缀
+    for i in range(20000):
+        recognize_captcha(i, test_file, save_path, image_suffix)
+
+
+if __name__ == '__main__':
+    main()

+ 36 - 0
requirements.txt

@@ -0,0 +1,36 @@
+absl-py==0.7.1
+astor==0.7.1
+bleach==1.5.0
+captcha==0.3
+certifi==2019.3.9
+chardet==3.0.4
+Click==7.0
+cycler==0.10.0
+easydict==1.8
+Flask==1.0.2
+gast==0.2.2
+grpcio==1.19.0
+html5lib==0.9999999
+idna==2.7
+itsdangerous==1.1.0
+Jinja2==2.10.1
+Markdown==3.1
+MarkupSafe==1.1.1
+matplotlib==2.1.0
+numpy==1.16.2
+olefile==0.46
+Pillow==4.3.0
+# protobuf==3.6.0
+protobuf==3.6.1
+pyparsing==2.4.0
+python-dateutil==2.8.0
+pytz==2018.9
+requests==2.19.1
+six==1.12.0
+# tensorboard==1.7.0
+# tensorflow==1.7.0
+tensorboard==1.12.0
+tensorflow==1.12.0
+termcolor==1.1.0
+urllib3==1.23
+Werkzeug==0.15.2

+ 125 - 0
test_batch.py

@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+import json
+
+import tensorflow as tf
+import numpy as np
+import time
+from PIL import Image
+import random
+import os
+from cnnlib.network import CNN
+
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+
+class TestError(Exception):
+    pass
+
+
+class TestBatch(CNN):
+    def __init__(self, img_path, char_set, model_save_dir, total):
+        # 模型路径
+        self.model_save_dir = model_save_dir
+        # 打乱文件顺序
+        self.img_path = img_path
+        self.img_list = os.listdir(img_path)
+        random.seed(time.time())
+        random.shuffle(self.img_list)
+
+        # 获得图片宽高和字符长度基本信息
+        label, captcha_array = self.gen_captcha_text_image()
+
+        captcha_shape = captcha_array.shape
+        captcha_shape_len = len(captcha_shape)
+        if captcha_shape_len == 3:
+            image_height, image_width, channel = captcha_shape
+            self.channel = channel
+        elif captcha_shape_len == 2:
+            image_height, image_width = captcha_shape
+        else:
+            raise TestError("图片转换为矩阵时出错,请检查图片格式")
+
+        # 初始化变量
+        super(TestBatch, self).__init__(image_height, image_width, len(label), char_set, model_save_dir)
+        self.total = total
+
+        # 相关信息打印
+        logger.debug("-->图片尺寸: %s X %s", image_height, image_width)
+        logger.debug("-->验证码长度: %s", self.max_captcha)
+        logger.debug("-->验证码共%d类 %s", self.char_set_len, char_set)
+        logger.debug("-->使用测试集为 %s", img_path)
+
+    def gen_captcha_text_image(self):
+        """
+        返回一个验证码的array形式和对应的字符串标签
+        :return:tuple (str, numpy.array)
+        """
+        img_name = random.choice(self.img_list)
+        # 标签
+        label = img_name.split("_")[0]
+        # 文件
+        img_file = os.path.join(self.img_path, img_name)
+        captcha_image = Image.open(img_file)
+        captcha_array = np.array(captcha_image)  # 向量化
+
+        return label, captcha_array
+
+    def test_batch(self):
+        y_predict = self.model()
+        total = self.total
+        right = 0
+
+        saver = tf.train.Saver()
+        with tf.Session() as sess:
+            saver.restore(sess, self.model_save_dir)
+            s = time.time()
+            for i in range(total):
+                # test_text, test_image = gen_special_num_image(i)
+                test_text, test_image = self.gen_captcha_text_image()  # 随机
+                test_image = self.convert2gray(test_image)
+                test_image = test_image.flatten() / 255
+
+                predict = tf.argmax(tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len]), 2)
+                text_list = sess.run(predict, feed_dict={self.X: [test_image], self.keep_prob: 1.})
+                predict_text = text_list[0].tolist()
+                p_text = ""
+                p_info = ""
+                for p in predict_text:
+                    p_text += str(self.char_set[p])
+                if test_text == p_text:
+                    p_info = "匹配"
+                    right += 1
+                else:
+                    p_info = "不匹配"
+                    pass
+                logger.debug("origin: %s predict: %s %s", test_text, p_text, p_info)
+            e = time.time()
+        rate = str(right / total * 100) + "%"
+        logger.debug("测试结果: %d/%d", right, total)
+        logger.debug("%d个样本识别耗时%f秒,准确率%s", total, e - s, rate)
+
+
+def main():
+    with open("conf/sample_config.json", "r") as f:
+        sample_conf = json.load(f)
+
+    test_image_dir = sample_conf["test_image_dir"]
+    model_save_dir = sample_conf["model_save_dir"]
+
+    use_labels_json_file = sample_conf['use_labels_json_file']
+
+    if use_labels_json_file:
+        with open("tools/labels.json", "r") as f:
+            char_set = f.read().strip()
+    else:
+        char_set = sample_conf["char_set"]
+
+    total = 15
+    tb = TestBatch(test_image_dir, char_set, model_save_dir, total)
+    tb.test_batch()
+
+
+if __name__ == '__main__':
+    main()

+ 31 - 0
tools/collect_labels.py

@@ -0,0 +1,31 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+统计样本的标签,并写入文件labels.json
+"""
+import os
+import json
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+image_dir = "../sample/origin"
+image_list = os.listdir(image_dir)
+
+labels = set()
+for img in image_list:
+    split_result = img.split("_")
+    if len(split_result) == 2:
+        label, name = split_result
+        if label:
+            for word in label:
+                labels.add(word)
+    else:
+        pass
+
+logger.debug("共有标签%d种", len(labels))
+
+with open("./labels.json", "w") as f:
+    f.write(json.dumps("".join(list(labels)), ensure_ascii=False))
+
+logger.debug("将标签列表写入文件labels.json成功")

+ 91 - 0
tools/correction_captcha.py

@@ -0,0 +1,91 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+人工在线验证脚本
+"""
+import requests
+from io import BytesIO
+import time
+import matplotlib.pyplot as plt
+import json
+import numpy as np
+from PIL import Image
+import os
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+
+def correction(fail_path, pass_path, correction_times, remote_url):
+    headers = {
+        'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
+    }
+
+    fail_count = 0
+    for index in range(correction_times):
+        # 请求
+        while True:
+            try:
+                response = requests.request("GET", remote_url, headers=headers, timeout=10)
+                break
+            except Exception as e:
+                logger.debug(e)
+
+        # 识别
+        s = time.time()
+        url = "http://127.0.0.1:6000/b"
+        files = {'image_file': ('captcha.jpg', BytesIO(response.content), 'application')}
+        r = requests.post(url=url, files=files)
+        e = time.time()
+        logger.debug("index:%d,时长:%s ms", index, int((e - s) * 1000))
+        logger.debug("返回结果: %s", r.text)
+        time.sleep(2)
+
+        # 识别结果
+        predict_text = json.loads(r.text)["value"]
+        f = plt.figure()
+        ax = f.add_subplot(111)
+        ax.text(0.1, 0.9, "备注", ha='center', va='center', transform=ax.transAxes)
+
+        # 图片字节流转为image array
+        img = BytesIO(response.content)
+        img = Image.open(img, mode="r")
+        captcha_array = np.array(img)
+        plt.imshow(captcha_array)
+
+        # 预测图片
+        logger.debug("预测: %s", predict_text)
+
+        # 显示图片和预测结果
+        plt.text(20, 2, 'predict:{}'.format(predict_text))
+        plt.show()
+
+        q = input("index:<{}> 正确按enter,错误输入真实值后会保存:".format(index))
+        img_name = "{}_{}".format(q, str(time.time()).replace(".", ""))
+        if q:
+            path = os.path.join(fail_path, img_name)
+            with open(path, "wb") as f:
+                f.write(response.content)
+            fail_count += 1
+        else:
+            path = os.path.join(pass_path, img_name)
+            with open(path, "wb") as f:
+                f.write(response.content)
+
+        logger.debug("==============")
+
+    rate = (correction_times - fail_count) / correction_times
+    logger.debug("Pass Rate: %s", rate)
+
+
+def main():
+    fail_path = "./sample/fail_sample/"
+    pass_path = "./sample/pass_sample/"
+    correction_times = 10
+    remote_url = "https://www.xxxxxxx.com/getImg"
+
+    correction(fail_path, pass_path, correction_times, remote_url)
+
+
+if __name__ == '__main__':
+    main()

+ 19 - 0
tools/gen_md_content.py

@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+import re
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+file_path = "../README.md"
+with open(file_path, "r") as f:
+    content = f.readlines()
+
+for c in content:
+    c = c.strip()
+    pattern = r"^#+\s[0-9.]+\s"
+    r = re.match(pattern, c)
+    if r:
+        c1 = re.sub(pattern, "", c)
+        c2 = re.sub(r"#+\s", "", c)
+        string = '<a href="#{}">{}</a>  '.format(c1, c2)
+        logger.debug(string)

+ 288 - 0
train_model.py

@@ -0,0 +1,288 @@
+# -*- coding: utf-8 -*-
+import json
+
+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+import time
+from PIL import Image
+import random
+import os
+from cnnlib.network import CNN
+from log_ware import LogWare
+
+log_inst = LogWare()
+logger = log_inst.get_logger()
+
+
+class TrainError(Exception):
+    pass
+
+
+class TrainModel(CNN):
+    def __init__(self, train_img_path, verify_img_path, char_set, model_save_dir, cycle_stop, acc_stop, cycle_save,
+                 image_suffix, train_batch_size, test_batch_size, verify=False):
+        # 训练相关参数
+        self.cycle_stop = cycle_stop
+        self.acc_stop = acc_stop
+        self.cycle_save = cycle_save
+        self.train_batch_size = train_batch_size
+        self.test_batch_size = test_batch_size
+
+        self.image_suffix = image_suffix
+        char_set = [str(i) for i in char_set]
+
+        # 打乱文件顺序+校验图片格式
+        self.train_img_path = train_img_path
+        self.train_images_list = os.listdir(train_img_path)
+        # 校验格式
+        if verify:
+            self.confirm_image_suffix()
+        # 打乱文件顺序
+        random.seed(time.time())
+        random.shuffle(self.train_images_list)
+
+        # 验证集文件
+        self.verify_img_path = verify_img_path
+        self.verify_images_list = os.listdir(verify_img_path)
+
+        # 获得图片宽高和字符长度基本信息
+        label, captcha_array = self.gen_captcha_text_image(train_img_path, self.train_images_list[0])
+
+        captcha_shape = captcha_array.shape
+        captcha_shape_len = len(captcha_shape)
+        if captcha_shape_len == 3:
+            image_height, image_width, channel = captcha_shape
+            self.channel = channel
+        elif captcha_shape_len == 2:
+            image_height, image_width = captcha_shape
+        else:
+            raise TrainError("图片转换为矩阵时出错,请检查图片格式")
+
+        # 初始化变量
+        super(TrainModel, self).__init__(image_height, image_width, len(label), char_set, model_save_dir)
+
+        # 相关信息打印
+        logger.debug("-->图片尺寸: %s X %s", image_height, image_width)
+        logger.debug("-->验证码长度: %s", self.max_captcha)
+        logger.debug("-->验证码共%s类 %s", self.char_set_len, char_set)
+        logger.debug("-->使用测试集为 %s", train_img_path)
+        logger.debug("-->使验证集为 %s", verify_img_path)
+
+        # test model input and output
+        logger.debug(">>> Start model test")
+        batch_x, batch_y = self.get_batch(0, size=10)
+        logger.debug(">>> input batch images shape: %s", batch_x.shape)
+        logger.debug(">>> input batch labels shape: %s", batch_y.shape)
+
+    @staticmethod
+    def gen_captcha_text_image(img_path, img_name):
+        """
+        返回一个验证码的array形式和对应的字符串标签
+        :return:tuple (str, numpy.array)
+        """
+        # 标签
+        label = img_name.split("_")[0]
+        # 文件
+        img_file = os.path.join(img_path, img_name)
+        captcha_image = Image.open(img_file)
+        captcha_array = np.array(captcha_image)  # 向量化
+        return label, captcha_array
+
+    def get_batch(self, n, size=128):
+        batch_x = np.zeros([size, self.image_height * self.image_width])  # 初始化
+        batch_y = np.zeros([size, self.max_captcha * self.char_set_len])  # 初始化
+
+        max_batch = int(len(self.train_images_list) / size)
+        # print("max_batch: %s", max_batch)
+        if max_batch - 1 < 0:
+            raise TrainError("训练集图片数量需要大于每批次训练的图片数量")
+        if n > max_batch - 1:
+            n = n % max_batch
+        s = n * size
+        e = (n + 1) * size
+        this_batch = self.train_images_list[s:e]
+        # print("%s:%s", s, e))
+
+        for i, img_name in enumerate(this_batch):
+            label, image_array = self.gen_captcha_text_image(self.train_img_path, img_name)
+            image_array = self.convert2gray(image_array)  # 灰度化图片
+            batch_x[i, :] = image_array.flatten() / 255  # flatten 转为一维
+            batch_y[i, :] = self.text2vec(label)  # 生成 oneHot
+        return batch_x, batch_y
+
+    def get_verify_batch(self, size=100):
+        batch_x = np.zeros([size, self.image_height * self.image_width])  # 初始化
+        batch_y = np.zeros([size, self.max_captcha * self.char_set_len])  # 初始化
+
+        verify_images = []
+        for i in range(size):
+            verify_images.append(random.choice(self.verify_images_list))
+
+        for i, img_name in enumerate(verify_images):
+            label, image_array = self.gen_captcha_text_image(self.verify_img_path, img_name)
+            image_array = self.convert2gray(image_array)  # 灰度化图片
+            batch_x[i, :] = image_array.flatten() / 255  # flatten 转为一维
+            batch_y[i, :] = self.text2vec(label)  # 生成 oneHot
+        return batch_x, batch_y
+
+    def confirm_image_suffix(self):
+        # 在训练前校验所有文件格式
+        print("开始校验所有图片后缀")
+        for index, img_name in enumerate(self.train_images_list):
+            print("%s image pass", index)
+            if not img_name.endswith(self.image_suffix):
+                raise TrainError('confirm images suffix:you request [.{}] file but get file [{}]'
+                                 .format(self.image_suffix, img_name))
+        logger.debug("所有图片格式校验通过")
+
+    def train_cnn(self):
+        y_predict = self.model()
+        logger.debug(">>> input batch predict shape: %s", y_predict.shape)
+        logger.debug(">>> End model test")
+        # 计算概率 损失
+        with tf.name_scope('cost'):
+            cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_predict, labels=self.Y))
+        # 梯度下降
+        with tf.name_scope('train'):
+            optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
+        # 计算准确率
+        predict = tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len])  # 预测结果
+        max_idx_p = tf.argmax(predict, 2)  # 预测结果
+        max_idx_l = tf.argmax(tf.reshape(self.Y, [-1, self.max_captcha, self.char_set_len]), 2)  # 标签
+        # 计算准确率
+        correct_pred = tf.equal(max_idx_p, max_idx_l)
+        with tf.name_scope('char_acc'):
+            accuracy_char_count = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
+        with tf.name_scope('image_acc'):
+            accuracy_image_count = tf.reduce_mean(tf.reduce_min(tf.cast(correct_pred, tf.float32), axis=1))
+        # 模型保存对象
+        saver = tf.train.Saver()
+        with tf.Session() as sess:
+            init = tf.global_variables_initializer()
+            sess.run(init)
+            # 恢复模型
+            if os.path.exists(self.model_save_dir):
+                try:
+                    saver.restore(sess, self.model_save_dir)
+                # 判断捕获model文件夹中没有模型文件的错误
+                except ValueError:
+                    logger.debug("model文件夹为空,将创建新模型")
+            else:
+                pass
+
+            # 写入日志
+            temp_log_dir = log_inst.get_log_dir()
+            sess_log_dir = os.path.join(temp_log_dir, 'train_sess')
+            # 是否存在目录,不存在就创建
+            mkdir_with_lambda = lambda x: os.makedirs(x) if not os.path.exists(x) else True
+            mkdir_with_lambda(sess_log_dir)
+            tf.summary.FileWriter(sess_log_dir, sess.graph)
+
+            step = 1
+            for i in range(self.cycle_stop):
+                batch_x, batch_y = self.get_batch(i, size=self.train_batch_size)
+                # 梯度下降训练
+                _, cost_ = sess.run([optimizer, cost],
+                                    feed_dict={self.X: batch_x, self.Y: batch_y, self.keep_prob: 0.75})
+                if step % 10 == 0:
+                    # 基于训练集的测试
+                    batch_x_test, batch_y_test = self.get_batch(i, size=self.train_batch_size)
+                    acc_char = sess.run(accuracy_char_count,
+                                        feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})
+                    acc_image = sess.run(accuracy_image_count,
+                                         feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})
+                    print("第{}次训练 >>> ".format(step))
+                    print("[训练集] 字符准确率为 {:.5f} 图片准确率为 {:.5f} >>> loss {:.10f}".format(acc_char, acc_image, cost_))
+
+                    # with open("loss_train.csv", "a+") as f:
+                    #     f.write("{},{},{},{}\n".format(step, acc_char, acc_image, cost_))
+
+                    # 基于验证集的测试
+                    batch_x_verify, batch_y_verify = self.get_verify_batch(size=self.test_batch_size)
+                    acc_char = sess.run(accuracy_char_count,
+                                        feed_dict={self.X: batch_x_verify, self.Y: batch_y_verify, self.keep_prob: 1.})
+                    acc_image = sess.run(accuracy_image_count,
+                                         feed_dict={self.X: batch_x_verify, self.Y: batch_y_verify, self.keep_prob: 1.})
+                    print("[验证集] 字符准确率为 {:.5f} 图片准确率为 {:.5f} >>> loss {:.10f}".format(acc_char, acc_image, cost_))
+
+                    # with open("loss_test.csv", "a+") as f:
+                    #     f.write("{}, {},{},{}\n".format(step, acc_char, acc_image, cost_))
+
+                    # 准确率达到99%后保存并停止
+                    if acc_image > self.acc_stop:
+                        saver.save(sess, self.model_save_dir)
+                        logger.debug("验证集准确率达到%s,保存模型成功", str(self.acc_stop * 100) + "%")
+                        break
+                # 每训练500轮就保存一次
+                if i % self.cycle_save == 0:
+                    saver.save(sess, self.model_save_dir)
+                    print("定时保存模型成功")
+                step += 1
+            saver.save(sess, self.model_save_dir)
+
+    def recognize_captcha(self):
+        label, captcha_array = self.gen_captcha_text_image(self.train_img_path, random.choice(self.train_images_list))
+
+        f = plt.figure()
+        ax = f.add_subplot(111)
+        ax.text(0.1, 0.9, "origin:" + label, ha='center', va='center', transform=ax.transAxes)
+        plt.imshow(captcha_array)
+        # 预测图片
+        image = self.convert2gray(captcha_array)
+        image = image.flatten() / 255
+
+        y_predict = self.model()
+
+        saver = tf.train.Saver()
+        with tf.Session() as sess:
+            saver.restore(sess, self.model_save_dir)
+            predict = tf.argmax(tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len]), 2)
+            text_list = sess.run(predict, feed_dict={self.X: [image], self.keep_prob: 1.})
+            predict_text = text_list[0].tolist()
+
+        logger.debug("正确: %s 预测: %s", label, predict_text)
+        # 显示图片和预测结果
+        p_text = ""
+        for p in predict_text:
+            p_text += str(self.char_set[p])
+        logger.debug("p_text: %s", p_text)
+        plt.text(20, 1, 'predict:{}'.format(p_text))
+        plt.show()
+
+
+def main():
+    with open("conf/sample_config.json", "r") as f:
+        sample_conf = json.load(f)
+
+    train_image_dir = sample_conf["train_image_dir"]
+    verify_image_dir = sample_conf["test_image_dir"]
+    model_save_dir = sample_conf["model_save_dir"]
+    cycle_stop = sample_conf["cycle_stop"]
+    acc_stop = sample_conf["acc_stop"]
+    cycle_save = sample_conf["cycle_save"]
+    enable_gpu = sample_conf["enable_gpu"]
+    image_suffix = sample_conf['image_suffix']
+    use_labels_json_file = sample_conf['use_labels_json_file']
+    train_batch_size = sample_conf['train_batch_size']
+    test_batch_size = sample_conf['test_batch_size']
+
+    if use_labels_json_file:
+        with open("tools/labels.json", "r") as f:
+            char_set = f.read().strip()
+    else:
+        char_set = sample_conf["char_set"]
+
+    if not enable_gpu:
+        # 设置以下环境变量可开启CPU识别
+        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+    tm = TrainModel(train_image_dir, verify_image_dir, char_set, model_save_dir, cycle_stop, acc_stop, cycle_save,
+                    image_suffix, train_batch_size, test_batch_size, verify=False)
+    tm.train_cnn()  # 开始训练模型
+    # tm.recognize_captcha()  # 识别图片示例
+
+
+if __name__ == '__main__':
+    main()

+ 160 - 0
verify_and_split_data.py

@@ -0,0 +1,160 @@
+"""
+验证图片尺寸和分离测试集(5%)和训练集(95%)
+初始化的时候使用,有新的图片后,可以把图片放在new目录里面使用。
+"""
+import json
+
+from PIL import Image
+import random
+import os
+import shutil
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+
+def verify(origin_dir, real_width, real_height, image_suffix):
+    """
+    校验图片大小
+    :return:
+    """
+    if not os.path.exists(origin_dir):
+        logger.debug("【警告】找不到目录%s,即将创建", origin_dir)
+        os.makedirs(origin_dir)
+
+    logger.debug("开始校验原始图片集")
+    # 图片真实尺寸
+    real_size = (real_width, real_height)
+    # 图片名称列表和数量
+    img_list = os.listdir(origin_dir)
+    total_count = len(img_list)
+    logger.debug("原始集共有图片: %d张", total_count)
+
+    # 无效图片列表
+    bad_img = []
+
+    # 遍历所有图片进行验证
+    for index, img_name in enumerate(img_list):
+        file_path = os.path.join(origin_dir, img_name)
+        # 过滤图片不正确的后缀
+        if not img_name.endswith(image_suffix):
+            bad_img.append((index, img_name, "文件后缀不正确"))
+            continue
+
+        # logger.debug("图片命名下划线位置:%s", img_name.find("_"))
+        if (img_name.find("_") == -1):
+            bad_img.append((index, img_name, "图片命名没有下划线,图片名称:" + img_name))
+            logger.debug("图片命名没有下划线,图片名称:%s", img_name)
+
+        # 过滤图片标签不标准的情况
+        prefix, posfix = img_name.split("_")
+        if prefix == "" or posfix == "":
+            bad_img.append((index, img_name, "图片标签异常"))
+            continue
+
+        # 图片无法正常打开
+        try:
+            img = Image.open(file_path)
+        except OSError:
+            bad_img.append((index, img_name, "图片无法正常打开"))
+            continue
+
+        # 图片尺寸有异常
+        if real_size == img.size:
+            logger.debug("%d pass", index)
+        else:
+            bad_img.append((index, img_name, "图片尺寸异常为:{}".format(img.size)))
+
+    logger.debug("====以下%d张图片有异常====", len(bad_img))
+    if bad_img:
+        for b in bad_img:
+            logger.debug("[第%d张图片] [%d] [%d]", b[0], b[1], b[2])
+    else:
+        logger.debug("未发现异常(共 %d 张图片)", len(img_list))
+    logger.debug("========end")
+    return bad_img
+
+
+def split(origin_dir, train_dir, test_dir, bad_imgs):
+    """
+    分离训练集和测试集
+    :return:
+    """
+    if not os.path.exists(origin_dir):
+        logger.debug("【警告】找不到目录%s,即将创建", origin_dir)
+        os.makedirs(origin_dir)
+
+    logger.debug("开始分离原始图片集为:测试集(5%)和训练集(95%)")
+
+    # 图片名称列表和数量
+    img_list = os.listdir(origin_dir)
+    for img in bad_imgs:
+        img_list.remove(img)
+    total_count = len(img_list)
+    logger.debug("共分配%d张图片到训练集和测试集,其中%d张为异常留在原始目录", total_count, len(bad_imgs))
+
+    # 创建文件夹
+    if not os.path.exists(train_dir):
+        os.mkdir(train_dir)
+
+    if not os.path.exists(test_dir):
+        os.mkdir(test_dir)
+
+    # 测试集
+    test_count = int(total_count * 0.05)
+    test_set = set()
+    for i in range(test_count):
+        while True:
+            file_name = random.choice(img_list)
+            if file_name in test_set:
+                pass
+            else:
+                test_set.add(file_name)
+                img_list.remove(file_name)
+                break
+
+    test_list = list(test_set)
+    logger.debug("测试集数量为:%s", len(test_list))
+    for file_name in test_list:
+        src = os.path.join(origin_dir, file_name)
+        dst = os.path.join(test_dir, file_name)
+        shutil.move(src, dst)
+
+    # 训练集
+    train_list = img_list
+    logger.debug("训练集数量为:%s", len(train_list))
+    for file_name in train_list:
+        src = os.path.join(origin_dir, file_name)
+        dst = os.path.join(train_dir, file_name)
+        shutil.move(src, dst)
+
+    if os.listdir(origin_dir) == 0:
+        logger.debug("migration done")
+
+
+def main():
+    with open("conf/sample_config.json", "r") as f:
+        sample_conf = json.load(f)
+
+    # 图片路径
+    origin_dir = sample_conf["origin_image_dir"]
+    new_dir = sample_conf["new_image_dir"]
+    train_dir = sample_conf["train_image_dir"]
+    test_dir = sample_conf["test_image_dir"]
+    # 图片尺寸
+    real_width = sample_conf["image_width"]
+    real_height = sample_conf["image_height"]
+    # 图片后缀
+    image_suffix = sample_conf["image_suffix"]
+
+    for image_dir in [origin_dir, new_dir]:
+        logger.debug(">>> 开始校验目录:[%s]", image_dir)
+        bad_images_info = verify(image_dir, real_width, real_height, image_suffix)
+        bad_imgs = []
+        for info in bad_images_info:
+            bad_imgs.append(info[1])
+        split(image_dir, train_dir, test_dir, bad_imgs)
+
+
+if __name__ == '__main__':
+    main()

+ 75 - 0
webserver_captcha_image.py

@@ -0,0 +1,75 @@
+# -*- coding: UTF-8 -*-
+"""
+    验证码图片接口,访问`/captcha/1`获得图片
+"""
+from captcha.image import ImageCaptcha
+import os
+import random
+from flask import Flask, request, jsonify, Response, make_response
+import json
+import io
+
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+# Flask对象
+app = Flask(__name__)
+basedir = os.path.abspath(os.path.dirname(__file__))
+
+with open("conf/captcha_config.json", "r") as f:
+    config = json.load(f)
+# 配置参数
+root_dir = config["root_dir"]  # 图片储存路径
+image_suffix = config["image_suffix"]  # 图片储存后缀
+characters = config["characters"]  # 图片上显示的字符集 # characters = "0123456789abcdefghijklmnopqrstuvwxyz"
+count = config["count"]  # 生成多少张样本
+char_count = config["char_count"]  # 图片上的字符数量
+
+webserver_captcha_url = config["webserver_captcha_url"]  # 验证码生成url
+webserver_captcha_port = config["webserver_captcha_port"]  # 验证码生成端口
+
+# 设置图片高度和宽度
+width = config["width"]
+height = config["height"]
+
+
+def response_headers(content):
+    resp = Response(content)
+    resp.headers['Access-Control-Allow-Origin'] = '*'
+    return resp
+
+
+def gen_special_img():
+    # 随机文字
+    text = ""
+    for j in range(char_count):
+        text += random.choice(characters)
+    logger.debug("随机字符: %s", text)
+    # 生成img文件
+    generator = ImageCaptcha(width=width, height=height)  # 指定大小
+    img = generator.generate_image(text)  # 生成图片
+    imgByteArr = io.BytesIO()
+    img.save(imgByteArr, format='PNG')
+    imgByteArr = imgByteArr.getvalue()
+    return imgByteArr
+
+
+@app.route('/captcha/', methods=['GET'])
+def show_photo():
+    if request.method == 'GET':
+        image_data = gen_special_img()
+        response = make_response(image_data)
+        response.headers['Content-Type'] = 'image/png'
+        response.headers['Access-Control-Allow-Origin'] = '*'
+        return response
+    else:
+        pass
+
+
+if __name__ == '__main__':
+    app.run(
+        host=webserver_captcha_url,
+        port=webserver_captcha_port,
+        debug=True
+    )

+ 107 - 0
webserver_recognize_api.py

@@ -0,0 +1,107 @@
+# -*- coding: UTF-8 -*-
+"""
+通过服务接口方式识别验证码
+构建flask接口服务
+接收 files={'image_file': ('captcha.jpg', BytesIO(bytes), 'application')} 参数识别验证码
+需要配置参数:
+    image_height = 40
+    image_width = 80
+    max_captcha = 4
+"""
+import json
+from io import BytesIO
+import os
+from cnnlib.recognition_object import Recognizer
+import time
+from flask import Flask, request, jsonify, Response
+from PIL import Image
+
+from log_ware import LogWare
+
+logger = LogWare().get_logger()
+
+# 默认使用CPU
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+with open("conf/sample_config.json", "r") as f:
+    sample_conf = json.load(f)
+
+# 配置参数
+env = sample_conf["env"]  # 环境
+image_height = sample_conf["image_height"]
+image_width = sample_conf["image_width"]
+max_captcha = sample_conf["max_captcha"]
+api_image_dir = sample_conf["api_image_dir"]
+model_save_dir = sample_conf["model_save_dir"]
+image_suffix = sample_conf["image_suffix"]  # 文件后缀
+use_labels_json_file = sample_conf['use_labels_json_file']
+
+webserver_recognize_url = sample_conf['webserver_recognize_url']  # 识别服务器IP
+webserver_recognize_port = sample_conf['webserver_recognize_port']  # 识别服务器端口
+
+if use_labels_json_file:
+    with open("tools/labels.json", "r") as f:
+        char_set = f.read().strip()
+else:
+    char_set = sample_conf["char_set"]
+
+# Flask对象
+app = Flask(__name__)
+basedir = os.path.abspath(os.path.dirname(__file__))
+
+# 生成识别对象,需要配置参数
+recognizer = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)
+
+
+# 如果你需要使用多个模型,可以参照原有的例子配置路由和编写逻辑
+# Q = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)
+
+
+def response_headers(content):
+    resp = Response(content)
+    resp.headers['Access-Control-Allow-Origin'] = '*'
+    return resp
+
+
+@app.route('/b', methods=['POST'])
+def up_image():
+    if request.method == 'POST' and request.files.get('image_file'):
+        timec = str(time.time()).replace(".", "")
+        file = request.files.get('image_file')
+        img = file.read()
+        img = BytesIO(img)
+        img = Image.open(img, mode="r")
+        # username = request.form.get("name")
+        logger.debug("接收图片尺寸: %s", img.size)
+        s = time.time()
+        value = recognizer.rec_image(img)
+        e = time.time()
+        logger.debug("识别结果: %s", value)
+
+        if env.lower() == 'dev':
+            # 保存图片
+            logger.debug("保存图片: %s%s_%s.%s", api_image_dir, value, timec, image_suffix)
+            file_name = "{}_{}.{}".format(value, timec, image_suffix)
+            file_path = os.path.join(api_image_dir + file_name)
+            img.save(file_path)
+
+        result = {
+            'time': timec,  # 时间戳
+            'value': value,  # 预测的结果
+            'speed_time(ms)': int((e - s) * 1000)  # 识别耗费的时间
+        }
+        img.close()
+        return jsonify(result)
+    else:
+        content = json.dumps({"error_code": "1001", "error_msg": "验证码识别出错"})
+        resp = response_headers(content)
+        return resp
+
+
+if __name__ == '__main__':
+    app.run(
+        host=webserver_recognize_url,
+        port=webserver_recognize_port,
+        debug=True
+    )