HOME/Articles/

pil example auto recommend ocr helper (snippet)

Article Outline

Python pil example 'auto recommend ocr helper'

Functions in program:

  • def get_question(img_name):
  • def space_ocr(img,ocr_mode='chs'):
  • def baidu_ocr(img, app_id = "10661627", app_key = "h5xcL0m4TB8fiiFWoysn7uxt", app_secret = "HGA1cgXzz80douKQUpII7K77TYWSGcfW", api_version=5, timeout=6):
  • def tesseract_ocr(img,ocr_mode='chi_sim'):
  • def hanvon_ocr(img_name):

Modules used in program:

  • import sys
  • import thread
  • import requests
  • import random
  • import url_helper
  • import selenium_helper
  • import screen_shot_helper
  • import recommend_helper
  • import question_helper
  • import ocr_helper
  • import jieba_helper
  • import jieba
  • import operator
  • import win32gui, win32ui, win32con
  • import threading
  • import config
  • import urllib,urllib2
  • import re
  • import time
  • import pytesseract
  • import os,base64,json
  • import colorsys

python auto recommend ocr helper

Python pil example: auto recommend ocr helper

#encoding=utf-8
from PIL import Image
import colorsys
import os,base64,json
import pytesseract
import time
import re
import urllib,urllib2
import config
import threading
import win32gui, win32ui, win32con
# import win32api
from selenium import webdriver
# import traceback
from selenium.webdriver.support.wait import WebDriverWait
# import urllib, urllib2
# import StringIO 
from aip import AipOcr
import operator
import jieba
import jieba_helper
import ocr_helper
import question_helper
import recommend_helper
import screen_shot_helper
import selenium_helper
import url_helper
import random
import requests
import thread
from lxml import html
import sys
reload(sys) 
sys.setdefaultencoding('utf8')

def hanvon_ocr(img_name):
#     key='79ce43f8-7c3c-40a8-8bf0-a3aa45384a87'
    key = '3cc4f16c357e4f329dab5e71c810a871'
    key = config.hanvon_ocr['key']
    url='http://api.hanvon.com/rt/ws/v1/ocr/text/recg?key=%s&code=4060d49a-acf8-4897-9f61-5540bda01ed9' % key
#     img=Image.open('1515770523.92.jpg')
#     img=img.convert('L')
#     _w=img.width
#     _h=img.height
#     img=img.resize((_w/3,_h/3),Image.ANTIALIAS)
#     img.save('card_gray.jpg')
    base64img=base64.b64encode(open(img_name,'rb').read())
    data={"uid":'',"lang":'auto',"color":'color',"image":base64img}

    headers={"Content-Type":"application/octet-stream"}
    resp=requests.post(url,data=json.dumps(data),headers=headers)
    return json.loads(resp.text)['textResult']
def tesseract_ocr(img,ocr_mode='chi_sim'):
    image=Image.open(img)#����֤��ͼƬ
    # image.load()#����һ��ͼƬ����ֹ�����˴���ʡ��
    # image.show()#����show��չʾͼƬ�������ã���ʡ��
    vcode=pytesseract.image_to_string(image,lang='chi_sim')    
    return vcode
def baidu_ocr(img, app_id = "10661627", app_key = "h5xcL0m4TB8fiiFWoysn7uxt", app_secret = "HGA1cgXzz80douKQUpII7K77TYWSGcfW", api_version=5, timeout=6):
    app_id = config.baidu_ocr['app_id']
    app_key = config.baidu_ocr['app_key']
    app_secret = config.baidu_ocr['app_secret']
    client = AipOcr(appId=app_id, apiKey=app_key, secretKey=app_secret)
    client.setConnectionTimeoutInMillis(timeout * 1000)
    options = {}
    options["language_type"] = "CHN_ENG"
    with open(img, "rb") as fp:
        fp = fp.read()
        result = ''
        if api_version == 1:
            result = client.basicAccurate(fp, options)
        else:
            result = client.basicGeneral(fp, options)
        if "error_code" in result:
            print("baidu api error: ", result["error_msg"])
            return ""
        txt = ''
        try:
            for line in result['words_result']:
                    txt = txt + line['words']+'||'
            return txt
        except:
            return ''
def space_ocr(img,ocr_mode='chs'):
    overlay = True
    api_key = '6c851da45688957'
    language = 'chs'
    filename = img
    payload = {'isOverlayRequired':overlay,
               'apikey':api_key,
               'language':language
               }
    with open(filename,'rb') as f:
        r = requests.post('https://api.ocr.space/parse/image',
                          files={filename:f},
                          data=payload,
                          )
        return json.loads(r.content)['ParsedResults'][0]['ParsedText']

def get_question(img_name):
    #调用各种ocr,提取图片中的文字    
    for i in config.ocr_prefer:
        question = ''
        if 'baidu' == i:
            question = baidu_ocr(img_name)
        elif 'space' == i:
            question = space_ocr(img_name)
        if question != '':
            return question
    return ''