1, background
a. Sogou has also released its own AI API, including identity card ocr, business card ocr, text translation and other APIs, with average accuracy in the initial test.
b. Based on Python 3.
c. It also has its own signature to generate this piece. With the foundation of goose factory, it is relatively simple to write.
d. However, the interface standardization of Sougou is obviously not as good as that of geese factory. The main structure of different api response packages is not consistent, so the implementation is only simple and structured
2. Implementation code
Put the code directly. github also has: https://github.com/jdstkxx/PySougouAI ......
1,sogouai-example.py
# -*- coding: utf-8 -*- ''' create by : joshua zou create date : 2018.4.9 Purpose: check sougou ai api ''' import glob,os from SougouAPIMsg import * #Instead, search for your own dog AI Of APPID,APPKEY,SecretKey AppID = '0000' ApiKey = '*********' SecretKey= '0PLvS-AHShmq**************' if __name__ == "__main__": sg = SougouAPIMsg(AppID,ApiKey,SecretKey) for file in glob.glob('D:\python\*.jpg'): filename=os.path.split(file)[1].split('.')[0] #call ocr Distinguish apiname = 'ocr' rest =sg.apiSougouOcr(apiname,file) #Call ID identification #rest =sg.apiSougouOcr('idcard',file) js= rest.json() retext ="" if apiname=='ocr': #Character recognition, rest Answer packet, string #Success {"result":[{"content":"01245177\n","frame":["0,0","207,0","207,59","0,59"]}],"success":1} #fail {"success":0} if js['success']==1 : retext = js['result'][0]['content'].strip() elif apiname == 'idcard': #ID card identification response package, obsessive-compulsive disorder ah, request structure, response structure are different ''' { "result": { "address": "xxxxxx", "Citizenship number": "11001xxx30", "Birth": "19900101", "Full name": "xxXX", "Gender": "X", "Nation": "xxx" }, "status": 0, "statusText": "Success" } ''' if js['status']==0 : retext = js['result']['Citizenship number'].strip() print(filename,retext)
2,SougouAPI.py
# -*- coding: utf-8 -*- # Sogou API Dictionaries SougouAPI={ #Basic text analysis API "ocr": { 'APINAME':'image recognition', #API Chinese short 'APIDESC': 'Recognize text in an image', #API describe 'APIURL': 'http://api.ai.sogou.com/pub/ocr' #API request URL }, "idcard":{ 'APINAME':'Identification of ID card', #API Chinese short 'APIDESC': 'Identification of ID card', #API describe 'APIURL': 'http://api.ai.sogou.com/pub/ocr/idcard' #API request URL }, }
3,SougouAPIMsg.py
# -*- coding: utf-8 -*- ''' create by : joshua zou create date : 2018.4.9 Purpose: check sougou ai api ''' import requests import base64 import hashlib import hmac import time from urllib import parse import json from SougouAPI import * class SougouAPIMsg(object): def __init__(self,AppID=None,ApiKey=None,SecretKey=None): if not AppID: AppID = '88888' if not ApiKey: ApiKey = '5ADwS88888888Dtr6QG2' if not SecretKey: SecretKey= '0PLvS-AH8888888889n6NF6fVVTt7m' self.__app_id= AppID self.__app_key= ApiKey self.__app_secret= SecretKey def get_time_stamp(self): return str(int(time.time())) ''' 1,Apply related prefix {AuthPrefix} {AuthPrefix}=sac-auth-v1/{accessKey}/{secondsSinceEpoch}/{expirationPeriodInSeconds} 2,Request relevant data {Data} {Data}={REQUEST_METHOD} + "\n" + {HOST} + "\n" + {URI} + "\n" + {SORTED_QUERY_STRING} //Where request|method is the HTTP method used by the request, such as GET|POST|PUT|DELETE HOST Domain name used for the service, as: api.ai.sogou.com URI Service path for request, as: /speech/asr SORTED_QUERY_STRING hold URL Medium Query String(Namely URL Medium "?" Back“ k1=v1&k2=v2" String). //The coding method is: //Split Query String into several items according to & and convert each item to the form of UriEncode(key) + "=" + UriEncode(value), where value can be an empty string //Sort all the converted strings in dictionary order. //Link the sorted strings in order with the & symbol. 3,Generate signature {Signature} {Signature}=HMAC-SHA256-BASE64({secretKey}, {AuthPrefix} + "\n" + {Data}) 4,Generate authentication information, adopt Authorization header transmit Authorization: {AuthPrefix}/{Signature} Example: 1\application accessKey/secretKey Respectively bTkALtTB9x6GAxmFi9wetAGH / PMROwlieALT36qfdGClVz2iH4Sv8xZxe POST Mode access http://api.ai.sogou.com/speech/asr interface GET Parameter is type=gbk&idx=1&starttime=1491810516 //The current system time is 149181516 2\Computation process {AuthPrefix}="sac-auth-v1/bTkALtTB9x6GAxmFi9wetAGH/1491810516/3600" {Data}="POST\napi.ai.sogou.com\n/speech/asr\nidx=1&starttime=1491810516&type=gbk" {Signature}=HMAC-SHA256-BASE64("PMROwlieALT36qfdGClVz2iH4Sv8xZxe", {AuthPrefix} + "\n" + {Data})="vuVEkzcnUeFv8FxeWS50c7S0HaYH1QKgtIV5xrxDY/s=" 3\Finally generated header by Authorization: sac-auth-v1/bTkALtTB9x6GAxmFi9wetAGH/1491810516/3600/vuVEkzcnUeFv8FxeWS50c7S0HaYH1QKgtIV5xrxDY/s= ''' def get_auth_sign_str(self,url,method): res= parse.urlparse(url) host= res.netloc uri = res.path query= res.query #1 Generate pre strings authprefix= 'sac-auth-v1/%s/%s/%s' %(self.__app_key,self.get_time_stamp(),3600) #2 generate data query=dict( (k, v if len(v)>1 else v[0] ) for k, v in parse.parse_qs(res.query).items() ) sort_dict= sorted(query.items(), key=lambda item:item[0], reverse = False) sortquerystr= parse.urlencode(sort_dict) data= '%s\n%s\n%s\n%s' %(method,host,uri,sortquerystr) #3 generate signstr signstr ='%s\n%s' %(authprefix,data) #call hamc.sha256 shastr =hmac.new(self.__app_secret.encode(), signstr.encode(), digestmod=hashlib.sha256).digest() #base64 Encoding, restoring to string signature = base64.b64encode(shastr).decode() #4 Combination into final authorization code authstr= '%s/%s' %(authprefix,signature) return authstr ''' $file = "OCR-test03.jpg"; $url = "http://api.ai.sogou.com/pub/ocr"; $hdr = array( "Content-Type: multipart/form-data", "Authorization: ".sign($ak, $sk, $url, "POST") ); // cURL headers for file uploading $postfields = array( "pic" => curl_file_create($file,'image/jpeg','a_b_c.jpg'), ); $ch = curl_init(); $options = array( CURLOPT_URL => $url, CURLOPT_HEADER => false, CURLOPT_POST => 1, CURLOPT_HTTPHEADER => $hdr, CURLOPT_POSTFIELDS => $postfields, CURLOPT_RETURNTRANSFER => true ); ''' def apiSougouOcr(self,apiname,picfilename): url = SougouAPI[apiname]['APIURL'] name = SougouAPI[apiname]['APINAME'] desc= SougouAPI[apiname]['APIDESC'] authstr=self.get_auth_sign_str(url, method='POST') header={ "Authorization": authstr } picfile= {'pic':open(picfilename,'rb')} resp = requests.post(url,headers=header,files=picfile) #print (resp.text)
- above -