Implementation of Sogou AI API in Python 3

Keywords: Python REST github JSON

1, background

a. Sogou has also released its own AI API, including identity card ocr, business card ocr, text translation and other APIs, with average accuracy in the initial test.

b. Based on Python 3.

c. It also has its own signature to generate this piece. With the foundation of goose factory, it is relatively simple to write.

d. However, the interface standardization of Sougou is obviously not as good as that of geese factory. The main structure of different api response packages is not consistent, so the implementation is only simple and structured

 

2. Implementation code

Put the code directly. github also has: https://github.com/jdstkxx/PySougouAI ......

1,sogouai-example.py

# -*- coding: utf-8 -*-

'''
create by : joshua zou
create date : 2018.4.9
Purpose: check sougou ai api
'''

import glob,os
from SougouAPIMsg import *

#Instead, search for your own dog AI Of APPID,APPKEY,SecretKey
AppID = '0000'
ApiKey = '*********'
SecretKey= '0PLvS-AHShmq**************'

if __name__ == "__main__":
    sg = SougouAPIMsg(AppID,ApiKey,SecretKey)
    for file in glob.glob('D:\python\*.jpg'):
        filename=os.path.split(file)[1].split('.')[0]
        #call ocr Distinguish
        apiname = 'ocr'
        rest =sg.apiSougouOcr(apiname,file)
        #Call ID identification
        #rest =sg.apiSougouOcr('idcard',file)
        

        js= rest.json()
        retext =""
        if apiname=='ocr':
            #Character recognition, rest Answer packet, string
            #Success  {"result":[{"content":"01245177\n","frame":["0,0","207,0","207,59","0,59"]}],"success":1}
            #fail  {"success":0}            
            if js['success']==1 :
                retext = js['result'][0]['content'].strip()                 
        elif apiname == 'idcard':
            #ID card identification response package, obsessive-compulsive disorder ah, request structure, response structure are different
            '''
            {
            "result": {
            "address": "xxxxxx",
            "Citizenship number": "11001xxx30",
            "Birth": "19900101",
            "Full name": "xxXX",
            "Gender": "X",
            "Nation": "xxx"
            },
            "status": 0,
            "statusText": "Success"
            }
            '''
            if js['status']==0 :
                retext = js['result']['Citizenship number'].strip()            
        print(filename,retext)

 

2,SougouAPI.py

# -*- coding: utf-8 -*-
# Sogou API Dictionaries

SougouAPI={
    #Basic text analysis API
    "ocr":    {
        'APINAME':'image recognition', #API Chinese short
        'APIDESC': 'Recognize text in an image', #API describe
        'APIURL': 'http://api.ai.sogou.com/pub/ocr' #API request URL
    },
    "idcard":{
        'APINAME':'Identification of ID card', #API Chinese short
        'APIDESC': 'Identification of ID card', #API describe
        'APIURL': 'http://api.ai.sogou.com/pub/ocr/idcard' #API request URL
    },
    
}

3,SougouAPIMsg.py

# -*- coding: utf-8 -*-

'''
create by : joshua zou
create date : 2018.4.9
Purpose: check sougou ai api
'''

import requests
import base64
import hashlib
import hmac
import time
from urllib import parse
import json
from SougouAPI import *

class SougouAPIMsg(object):
    def __init__(self,AppID=None,ApiKey=None,SecretKey=None):
        if not AppID: AppID = '88888'
        if not ApiKey: ApiKey = '5ADwS88888888Dtr6QG2'
        if not SecretKey: SecretKey= '0PLvS-AH8888888889n6NF6fVVTt7m'
        self.__app_id= AppID 
        self.__app_key= ApiKey 
        self.__app_secret= SecretKey 

    
    def get_time_stamp(self):
        return str(int(time.time()))
    
    '''
    1,Apply related prefix {AuthPrefix}
    {AuthPrefix}=sac-auth-v1/{accessKey}/{secondsSinceEpoch}/{expirationPeriodInSeconds}
    2,Request relevant data {Data}        
    {Data}={REQUEST_METHOD} + "\n" + {HOST} + "\n" + {URI} + "\n" + {SORTED_QUERY_STRING}
    //Where request|method is the HTTP method used by the request, such as GET|POST|PUT|DELETE
    HOST Domain name used for the service, as: api.ai.sogou.com
    URI Service path for request, as: /speech/asr
    SORTED_QUERY_STRING hold URL Medium Query String(Namely URL Medium "?" Back“ k1=v1&k2=v2" String).        
    //The coding method is:
    //Split Query String into several items according to & and convert each item to the form of UriEncode(key) + "=" + UriEncode(value), where value can be an empty string
    //Sort all the converted strings in dictionary order.
    //Link the sorted strings in order with the & symbol.
    3,Generate signature {Signature}        
     {Signature}=HMAC-SHA256-BASE64({secretKey}, {AuthPrefix} + "\n" + {Data})
    4,Generate authentication information, adopt Authorization header transmit        
     Authorization: {AuthPrefix}/{Signature}
     Example:
     1\application accessKey/secretKey Respectively bTkALtTB9x6GAxmFi9wetAGH / PMROwlieALT36qfdGClVz2iH4Sv8xZxe
       POST Mode access http://api.ai.sogou.com/speech/asr interface
       GET Parameter is type=gbk&idx=1&starttime=1491810516
       //The current system time is 149181516
     2\Computation process         
       {AuthPrefix}="sac-auth-v1/bTkALtTB9x6GAxmFi9wetAGH/1491810516/3600"
       {Data}="POST\napi.ai.sogou.com\n/speech/asr\nidx=1&starttime=1491810516&type=gbk"
       {Signature}=HMAC-SHA256-BASE64("PMROwlieALT36qfdGClVz2iH4Sv8xZxe", {AuthPrefix} + "\n" + {Data})="vuVEkzcnUeFv8FxeWS50c7S0HaYH1QKgtIV5xrxDY/s="
     3\Finally generated header by
       Authorization: sac-auth-v1/bTkALtTB9x6GAxmFi9wetAGH/1491810516/3600/vuVEkzcnUeFv8FxeWS50c7S0HaYH1QKgtIV5xrxDY/s=
    '''
    def get_auth_sign_str(self,url,method):
        res= parse.urlparse(url)
        host= res.netloc
        uri = res.path
        query= res.query
               
        
        #1 Generate pre strings
        authprefix= 'sac-auth-v1/%s/%s/%s' %(self.__app_key,self.get_time_stamp(),3600)
        #2 generate data
        query=dict( (k, v if len(v)>1 else v[0] )
                        for k, v in parse.parse_qs(res.query).items() )         
        sort_dict= sorted(query.items(), key=lambda item:item[0], reverse = False)
        sortquerystr= parse.urlencode(sort_dict)
        data= '%s\n%s\n%s\n%s' %(method,host,uri,sortquerystr)
        #3 generate signstr
        signstr ='%s\n%s' %(authprefix,data)
        #call hamc.sha256
        shastr =hmac.new(self.__app_secret.encode(), signstr.encode(), digestmod=hashlib.sha256).digest()
        #base64 Encoding, restoring to string
        signature = base64.b64encode(shastr).decode()
        
        #4 Combination into final authorization code
        authstr= '%s/%s' %(authprefix,signature)
        return authstr

    '''
    $file = "OCR-test03.jpg";
    $url = "http://api.ai.sogou.com/pub/ocr";
    
    $hdr = array(
        "Content-Type: multipart/form-data",
        "Authorization: ".sign($ak, $sk, $url, "POST")
    ); // cURL headers for file uploading
    
    $postfields = array(
        "pic" => curl_file_create($file,'image/jpeg','a_b_c.jpg'),
    );
    
    $ch = curl_init();
    $options = array(
        CURLOPT_URL => $url,
        CURLOPT_HEADER => false,
        CURLOPT_POST => 1,
        CURLOPT_HTTPHEADER => $hdr,
        CURLOPT_POSTFIELDS => $postfields,
        CURLOPT_RETURNTRANSFER => true
    );
    '''
    def apiSougouOcr(self,apiname,picfilename):
        url = SougouAPI[apiname]['APIURL']
        name = SougouAPI[apiname]['APINAME']
        desc= SougouAPI[apiname]['APIDESC']
        
        authstr=self.get_auth_sign_str(url, method='POST')
        header={ "Authorization": authstr }
        
        picfile= {'pic':open(picfilename,'rb')}
        
        resp = requests.post(url,headers=header,files=picfile)           
        #print (resp.text)

 

- above -

Posted by udendra on Sat, 04 Apr 2020 21:45:08 -0700