1
0

recognize_online.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. """
  4. 使用自建的接口识别来自网络的验证码
  5. 需要配置参数:
  6. remote_url = "https://www.xxxxxxx.com/getImg" 验证码链接地址
  7. rec_times = 1 识别的次数
  8. """
  9. import datetime
  10. import requests
  11. from io import BytesIO
  12. import time
  13. import json
  14. import os
  15. from flask import Flask, request, Response
  16. from log_ware import LogWare
  17. logger = LogWare().get_logger()
  18. class RecognizeOnlineError(Exception):
  19. pass
  20. # Flask对象
  21. app = Flask(__name__)
  22. basedir = os.path.abspath(os.path.dirname(__file__))
  23. # 客户端连接传递的cookie
  24. jsession_id = ''
  25. with open("conf/sample_config.json", "r") as f:
  26. sample_conf = json.load(f)
  27. # 配置参数
  28. env = sample_conf["env"] # 环境
  29. remote_url = sample_conf["remote_url"] # 网络验证码地址
  30. image_suffix = sample_conf["image_suffix"] # 文件后缀
  31. online_save_path = sample_conf["online_image_dir"] # 远从远程验证码url获取的待识别图片,识别后保存图片的路径
  32. image_suffix = sample_conf["image_suffix"] # 文件后缀
  33. webserver_recognize_url = sample_conf['webserver_recognize_url'] # 识别服务器IP
  34. webserver_recognize_port = sample_conf['webserver_recognize_port'] # 识别服务器端口
  35. request_recognize_ip = sample_conf['request_recognize_ip'] # 识别服务器IP,供外部程序调用
  36. request_recognize_port = sample_conf['request_recognize_port'] # 识别服务器端口
  37. def recognize_captcha(jsession_id, remote_url, rec_times, save_path, image_suffix):
  38. image_file_name = 'captcha.{}'.format(image_suffix)
  39. # 根据实际需求配置headers
  40. headers = {
  41. # 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36",
  42. "Host": "app.singlewindow.cn",
  43. 'Referer': 'https://app.singlewindow.cn/ceb2pubweb/sw/personalAmount',
  44. 'Cookie': "jsessionid={}".format(jsession_id)
  45. }
  46. for index in range(rec_times):
  47. # 请求
  48. while True:
  49. try:
  50. req_url = "{}?timeStamp={}".format(remote_url, int(round(time.time() * 1000)))
  51. response = requests.request("GET", req_url, headers=headers, timeout=6)
  52. if response.text:
  53. break
  54. else:
  55. logger.warn("retry, response.text is empty")
  56. except Exception as ee:
  57. logger.error(ee)
  58. # 识别
  59. s = time.time()
  60. url = "http://{}:{}/b".format(webserver_recognize_url, str(webserver_recognize_port))
  61. files = {'image_file': (image_file_name, BytesIO(response.content), 'application')}
  62. r = requests.post(url=url, files=files)
  63. e = time.time()
  64. # 识别结果
  65. logger.debug("远程下载图片,调用本地识别服务,接口响应: %s", r.text)
  66. predict_text = json.loads(r.text)["value"]
  67. now_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  68. logger.debug("【%s】 index:%d 耗时:%s ms 预测结果:%s", now_time, index, int((e - s) * 1000), predict_text)
  69. if env.lower() == 'dev':
  70. # 保存文件
  71. img_name = "{}_{}.{}".format(predict_text, str(time.time()).replace(".", ""), image_suffix)
  72. path = os.path.join(save_path, img_name)
  73. with open(path, "wb") as f:
  74. f.write(response.content)
  75. logger.debug("============== online recognized end ==============")
  76. return predict_text
  77. def response_headers(content):
  78. resp = Response(content)
  79. resp.headers['Access-Control-Allow-Origin'] = '*'
  80. return resp
  81. @app.route('/rec', methods=['GET', 'POST'])
  82. def request_recongnize():
  83. if (request.method == 'POST' or request.method == 'GET'):
  84. if (request.method == 'POST' and request.form['jsessionid']):
  85. jsession_id = request.form['jsessionid']
  86. elif (request.method == 'GET' and request.args.get('jsessionid')):
  87. jsession_id = request.args.get('jsessionid')
  88. else:
  89. logger.debug("缺少请求参数jsessionid")
  90. content = json.dumps({"error_code": "1002", "error_msg": "缺少请求参数jsessionid"})
  91. resp = response_headers(content)
  92. return resp
  93. rec_times = 1
  94. captcha_text = recognize_captcha(jsession_id, remote_url, rec_times, online_save_path, image_suffix)
  95. content = json.dumps({"captcha_text": captcha_text, "jsessionid": jsession_id})
  96. logger.debug("返回验证码:%s,请求jsessionid:%s", captcha_text, jsession_id)
  97. else:
  98. content = json.dumps({"error_code": "1000", "error_msg": "只能是GET,POST请求"})
  99. resp = response_headers(content)
  100. return resp
  101. if __name__ == '__main__':
  102. app.run(
  103. host=request_recognize_ip,
  104. port=request_recognize_port,
  105. debug=True
  106. )