HOME/Articles/

pil example auto recommend recommend helper (snippet)

Article Outline

Python pil example 'auto recommend recommend helper'

Functions in program:

  • def baidu_count2(question_body):
  • def get_result_num(index,params,score):
  • def baidu_count(question_body,ref=0,ans=1):
  • def recommend_accurate(question_body):
  • def recommend_fast(question_body,ref=0,ans=1):

Modules used in program:

  • import sys
  • import thread
  • import requests
  • import random
  • import jieba
  • import operator
  • import win32gui, win32ui, win32con
  • import threading
  • import config
  • import urllib,urllib2
  • import re
  • import url_helper
  • import selenium_helper
  • import screen_shot_helper
  • import recommend_helper
  • import question_helper
  • import ocr_helper
  • import jieba_helper
  • import time
  • import url_helper
  • import selenium_helper
  • import screen_shot_helper
  • import recommend_helper
  • import question_helper
  • import ocr_helper
  • import jieba_helper
  • import pytesseract
  • import os,base64,json
  • import colorsys

python auto recommend recommend helper

Python pil example: auto recommend recommend helper

#encoding=utf-8
from PIL import Image
import colorsys
import os,base64,json
import pytesseract
import jieba_helper
import ocr_helper
import question_helper
import recommend_helper
import screen_shot_helper
import selenium_helper
import url_helper
import time
import jieba_helper
import ocr_helper
import question_helper
import recommend_helper
import screen_shot_helper
import selenium_helper
import url_helper
import re
import urllib,urllib2
import config
import threading
import win32gui, win32ui, win32con
# import win32api
from selenium import webdriver
# import traceback
from selenium.webdriver.support.wait import WebDriverWait
# import urllib, urllib2
# import StringIO 
from aip import AipOcr
import operator
import jieba
import random
import requests
import thread
from lxml import html
import sys
reload(sys) 
sys.setdefaultencoding('utf8')

def recommend_fast(question_body,ref=0,ans=1):
    try:
        s = baidu_count(question_body,ref,ans)
        summary_li = sorted(s.items(), key=operator.itemgetter(1), reverse=True)
        result = ''
        if ref == 0:
            if ans == 0:
                result = result +'★'
            result = result +'百度搜索\n'
        else:
            if ans == 0:
                result = result + '★'
            result = result + '问答平台\n'
        for k in summary_li:
            result = result + str(k[1]) + '\t' + k[0] + '\n'
        print(result)
    except:
        print('baidu_count异常')
def recommend_accurate(question_body):
    s = baidu_count2(question_body)
    summary_li = sorted(s.items(), key=operator.itemgetter(1), reverse=True)
    print('关联程度')
    for k in summary_li:
        print(k[1],)
        print('\t',)
        print(k[0])

def baidu_count(question_body,ref=0,ans=1):
#     headers = {
#         # "Cache-Control": "no-cache",
#         "Host": "www.baidu.com",
#         "User-Agent": random.choice(Agents)
#     }
#     params = {
#         "wd": keyword
#     }
#     resp = requests.get("https://www.baidu.com/s", params=params, headers=headers, timeout=timeout)
#     if not resp.ok:
#         print("baidu search error")
#         return {
#             ans: 0
#             for ans in answers
#         }
    keyword = question_body[1]
    answers = question_body[2]
    if ans == 1:#加选项搜索
        for i in answers:
            keyword = keyword + " " + i
        if len(keyword)>38:
            keyword = question_body[1]



    content = url_helper.get_result_content(keyword,ref)
#     print(content)
    summary = {
        ans: content.count(ans)
        for ans in answers
    }
    ans1 = answers[0]
    ans2 = answers[1]
    ans3 = answers[2]
    if ans1 in ans2:
        summary[ans1] = summary[ans1] - summary[ans2]
    if ans1 in ans3:
        summary[ans1] = summary[ans1] - summary[ans3]
    if ans2 in ans1:
        summary[ans2] = summary[ans2] - summary[ans1]
    if ans2 in ans3:
        summary[ans2] = summary[ans2] - summary[ans3]
    if ans3 in ans1:
        summary[ans3] = summary[ans3] - summary[ans1]
    if ans3 in ans2:
        summary[ans3] = summary[ans3] - summary[ans2]
#     summary = {
#         ans: resp.text.count(ans)
#         for ans in answers
#     }
    if all([cnt == 0 for cnt in summary.values()]):
        return summary

    default = list(summary.values())[0]
    if all([value == default for value in summary.values()]):
        answer_firsts = {
            ans: content.index(ans)
            for ans in answers
        }
        sorted_li = sorted(answer_firsts.items(), key=operator.itemgetter(1), reverse=False)
        answer_li, index_li = zip(*sorted_li)
        return {
            a: b
            for a, b in zip(answer_li, reversed(index_li))
        }
    return summary

def get_result_num(index,params,score):
    headers = {
        # "Cache-Control": "no-cache",
        "Host": "www.baidu.com",
        "User-Agent": random.choice(url_helper.Agents),
        "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
    }
#     resp = requests.get("http://www.baidu.com/s", params=params, headers=headers, timeout=2)
#     if not resp.ok:
#         print("baidu search error")
#         return score.append((index,0))
    tree = html.fromstring(url_helper.get_result_content(params['wd']),0)
    nums = tree.xpath("//div[@class='nums']/text()")
    if len(nums)>0:
        result_nums = int(re.findall(re.compile(r'\d+'),nums[0].replace(',',''))[0])
    #     print(params['wd'],)
    #     print(result_nums)
        score.append((index,result_nums))
    else:
        print('错误')
        result_nums = 0
        score.append((index,result_nums))
def baidu_count2(question_body):
    keyword = question_body[1]
    answers = question_body[2]
    params_ls = []
    for i in answers:
        params = {"wd": keyword + ' ' + i}
        params_ls.append(params)
        params = {"wd": i}
        params_ls.append(params)
    params_ls.append({"wd": keyword})
    score = []
    index = 0
    thread_ls = []
    for params in params_ls:
        thread1 = threading.Thread(target=get_result_num,args=(index, params, score))
        index = index + 1
        thread_ls.append(thread1)
        thread1.start()
    for t in thread_ls:
        t.join()

    tmp_ls = sorted(score, key=lambda i: i[0])
    score = []
    for i in tmp_ls:
        score.append(i[1])
#     print(score)
#         resp = requests.get("http://www.baidu.com/s", params=params, headers=headers, timeout=timeout)
#         if not resp.ok:
#             print("baidu search error")
#             continue
#         tree = html.fromstring(resp.text)
#         nums = tree.xpath("//div[@class='nums']/text()")
#         result_nums = int(re.findall(re.compile(r'\d+'),nums[0].replace(',',''))[0])
#         print(result_nums)
#         score.append(result_nums)
    #计算分数
    #K = count(Q&A) / (count(Q) * count(A))
    k_a = 0
    k_b = 0
    k_c = 0
    if score[-1] != 0:
        if score[1] != 0:
            k_a = int(score[0]*100000000000.0/(score[-1]*score[1]))
        if score[3] != 0:
            k_b = int(score[2]*100000000000.0/(score[-1]*score[3]))
        if score[5] != 0:
            k_c = int(score[4]*100000000000.0/(score[-1]*score[5]))

    s = [(answers[0],k_a),(answers[1],k_b),(answers[2],k_c)]
    ss = sorted(s, key=lambda i: i[1])
    ss = ss[::-1]
    result = '参考-关联程度:\n'
    for i in ss:
        result = result + i[0] + '\t' + str(i[1]) + '\n'
    print(result )