HOME/Articles/

pil example compiler (snippet)

Article Outline

Python pil example 'compiler'

Functions in program:

  • def image_process(pil_img, border):
  • def prob_statement(contest_path, prob_num):

Modules used in program:

  • import sympy
  • import urllib
  • import re
  • import os
  • import requests

python compiler

Python pil example: compiler

from bs4 import BeautifulSoup
import requests
import os
from PIL import Image
import re
import urllib
import sympy


def prob_statement(contest_path, prob_num):
    accepted_list = []
    with open(os.path.join(contest_path, str(prob_num) + ".html"), "r", encoding="utf8") as f:

        contents = f.read()

        soup = BeautifulSoup(contents, 'html.parser')
    # page = requests.get(url)
    # soup = BeautifulSoup(page.text, 'xml')
    paragraph = ""
    image_latex = []
    for element in soup.find(class_="mw-content-ltr").find(class_="mw-parser-output"):
        mini_image_latex = []
        mini_paragraph = ""
        if element.name == 'h2' or element.name == 'h3':
            if element.find(class_="mw-headline", recursive=False).text.lower().find("problem") == -1:
                break
        if element.name == 'ul' or element.name == 'ol':
            paragraph += r' \begin{itemize}'
            bullets = element.findAll('li')
            for bullet in bullets:
                mini_paragraph = ""
                mini_image_latex = []
                for link in bullet.findAll('a'):
                    link.replaceWithChildren()
                test = bullet.findAll('img', recursive=False)
                for s in test:
                    if str(s['alt']).find("[asy]") != -1:
                        accepted_list.append(s['src'])
                        mini_image_latex.append("")
                    elif str(s['alt']).find("$") == -1:
                        if s.has_attr("class") and s['class'] == ['latexcenter']:
                            mini_image_latex.append(r" \begin{center} " + s['alt'] + " \end{center} ")
                        else:
                            mini_image_latex.append("")
                    else:
                        mini_image_latex.append(s['alt'])
                mini_paragraph += r" \item "
                mini_paragraph += str(bullet)
                mini_paragraph = str(mini_paragraph).replace("<p>", "")
                mini_paragraph = str(mini_paragraph).replace("</p>", "")
                mini_paragraph = str(mini_paragraph).replace("</img>", "")
                mini_paragraph = str(mini_paragraph).replace("<i>", "")
                mini_paragraph = str(mini_paragraph).replace("</i>", "")
                mini_paragraph = str(mini_paragraph).replace("<li>", "")
                mini_paragraph = str(mini_paragraph).replace("</li>", "")
                mini_paragraph = str(mini_paragraph).replace("</b>", "")
                mini_paragraph = str(mini_paragraph).replace("<b>", "")
                mini_paragraph = str(mini_paragraph).replace("<ul>", "")
                mini_paragraph = str(mini_paragraph).replace("</ul>", "")
                regex = re.compile(".*?<(.*?)>")
                i = 0
                for s in re.findall(regex, mini_paragraph):
                    mini_paragraph = mini_paragraph.replace("<" + s + ">", mini_image_latex[i])
                    i += 1
                if bullet.find('a'):
                    if bullet.find('a').has_attr("class"):  # standalone iamge
                        if bullet.find('a')['class'] == ['image']:
                            link_source = bullet.find('a').img['src']
                            accepted_list.append(link_source)
                image_latex += mini_image_latex
                paragraph += mini_paragraph
            paragraph += r' \end{itemize}'
        if element.name == 'p' or element.name == 'center':
            for link in element.findAll('a'):
                link.replaceWithChildren()
            for s in element.findAll('img'):
                if str(s['alt']).find("[asy]") != -1:
                    accepted_list.append(s['src'])
                    mini_image_latex.append("")
                elif str(s['alt']).find("$") == -1:
                    if s.has_attr("class") and s['class'] == ['latexcenter']:
                        mini_image_latex.append(r" \begin{center} " + s['alt'] + " \end{center} ")
                    else:
                        mini_image_latex.append("")
                        accepted_list.append(s['src'])
                else:
                    mini_image_latex.append(s['alt'])
            mini_paragraph += str(element)
            mini_paragraph = str(mini_paragraph).replace("<p>", "")
            mini_paragraph = str(mini_paragraph).replace("</p>", "")
            mini_paragraph = str(mini_paragraph).replace("</img>", "")
            mini_paragraph = str(mini_paragraph).replace("<i>", "")
            mini_paragraph = str(mini_paragraph).replace("</i>", "")
            mini_paragraph = str(mini_paragraph).replace("</b>", "")
            mini_paragraph = str(mini_paragraph).replace("<b>", "")
            mini_paragraph = str(mini_paragraph).replace("<br>", "")
            mini_paragraph = str(mini_paragraph).replace("<br/>", "")
            mini_paragraph = str(mini_paragraph).replace("<center>", "")
            mini_paragraph = str(mini_paragraph).replace("</center>", "")
            mini_paragraph = str(mini_paragraph).replace("<small>", "")
            mini_paragraph = str(mini_paragraph).replace("</small>", "")
            mini_paragraph = str(mini_paragraph).replace("<div>", "")
            mini_paragraph = str(mini_paragraph).replace("</div>", "")
            mini_paragraph = str(mini_paragraph).replace("<tt>", "")
            mini_paragraph = str(mini_paragraph).replace("</tt>", "")
            regex = re.compile(".*?<(.*?)>")
            i = 0
            for s in re.findall(regex, mini_paragraph):
                mini_paragraph = mini_paragraph.replace("<" + s + ">", mini_image_latex[i])
                i += 1
            if element.find('a'):
                if element.find('a').has_attr("class"):
                    if element.find('a')['class'] == ['image']:
                        link_source = element.find('a').img['src']
                        accepted_list.append(link_source)
            image_latex += mini_image_latex
            paragraph += mini_paragraph
    return paragraph, accepted_list


def image_process(pil_img, border):
    width, height = pil_img.size
    new_width = width + border * 2
    new_height = height + border * 2
    result = Image.new(pil_img.mode, (new_width, new_height))
    result.paste(pil_img, (border, border))
    return result

#
MAIN_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_folder")
AMC_PATH = os.path.join(MAIN_PATH, "AMC")
AIME_PATH = os.path.join(MAIN_PATH, "AIME")
USAJMO_PATH = os.path.join(MAIN_PATH, "USAJMO")
USAMO_PATH =  os.path.join(MAIN_PATH, "USAMO")
os.makedirs(os.path.dirname(AMC_PATH),exist_ok=True)
os.makedirs(os.path.dirname(AIME_PATH),exist_ok=True)
os.makedirs(os.path.dirname(USAJMO_PATH),exist_ok=True)
os.makedirs(os.path.dirname(USAMO_PATH),exist_ok=True)

#AMC Problems from 2000 to 2001
#Only two per year: AMC 10 and AMC 12
#AMC Problems from 2002 to 2019
#Four per year: 10A, 10B, 12A, 12B
#
for year in range(2000, 2020):
    if year < 2002:
        versions = ["10", "12"]
    else:
        versions = ["10A", "10B", "12A", "12B"]
    os.makedirs(os.path.join(AMC_PATH, str(year)), exist_ok=True)
    for version in versions:

        CONTEST_PATH = os.path.join(AMC_PATH, str(year),version)
        os.makedirs(CONTEST_PATH, exist_ok=True)
        for problem_num in range(1,26):
            URL = f"https://artofproblemsolving.com/wiki/index.php/{year}_AMC_{version}_Problems/Problem_{problem_num}"
            try:
                latex, images = prob_statement(os.path.join(os.path.dirname(os.path.realpath(__file__)), "webpages\\AMC\\"+str(year)+"\\"+version), problem_num)
                latex = latex.replace("\n", "\n \n")
                #print(latex)
                PROBLEM_PATH = os.path.join(CONTEST_PATH, str(problem_num))
                os.makedirs(PROBLEM_PATH, exist_ok=True)
    #             #TODO: write latex to latex.txt in os.path.join(CONTEST_PATH, "latex.txt")
                txt = open(os.path.join(PROBLEM_PATH, "latex.txt"), "w+")
                txt.write(latex)
                txt.close()
                # print(PROBLEM_PATH)
                # print(latex)
                STATEMENT_PATH = os.path.join(PROBLEM_PATH, "statement.png")

                sympy.preview(latex, viewer='file',
                              filename=STATEMENT_PATH, euler=False,
                              dvioptions=["-T", "tight", "-z", "0", "--truecolor", "-D 600", "-bg", "Transparent", "-fg",
                                          "White"])
                im = Image.open(STATEMENT_PATH)
                im = image_process(im, 10)
                im.save(STATEMENT_PATH)
                # display_surface.blit(im, (0, 0))
                # for event in pygame.event.get():
                #     if event.type == pygame.QUIT:
                #         pygame.quit()
                #         quit()
                #     pygame.display.update()
                #     pygame.display.flip()

                IMAGE_FOLDER = os.path.join(PROBLEM_PATH, "images")
                os.makedirs(IMAGE_FOLDER, exist_ok=True)
                IMAGE_INDEX = 0
                for image in images:
                    if image.startswith('//latex'):
                        image = "https:" + image
                    IMAGE_PATH = os.path.join(IMAGE_FOLDER, str(IMAGE_INDEX) + ".png")
                    urllib.request.urlretrieve(image, IMAGE_PATH)
                    pil_image = Image.open(IMAGE_PATH)
                    layer = Image.new('RGB', pil_image.size, (255, 255, 255))
                    layer.paste(pil_image, (0, 0))
                    layer.save(IMAGE_PATH)
                    IMAGE_INDEX += 1


            except Exception as e:
                print("problem " + str(problem_num) + " on the " + str(year) + "AMC " + version)


# #AIME Problems: From 1983 to 1999, there was only one contest per year. From 2000 to present, there has been an AIME I
# # and an AIME II. 15 problems per contest.

# recompile: 1984 AIME 15 done, 1989 AIME 14 done, 1991 AIME 6 done, 1991 AIME 12 done, 1993 AIME 6 done, 2005 AIME I p13 done,
#  2008 aime I p13 done, 2008 AIME II p15 done, 2009 AIME I P9 done, 2012 AIME I P15 done, 2014 AIME II P11 done ,
# 2015 AIME I P15 done
# for year in range(2015, 2016):
#
#     if year < 2000:
#         versions = [""]
#     else:
#         versions = ["_I", "_II"]
#     for version in versions:
#         if version == "":
#             CONTEST_PATH = os.path.join(AIME_PATH, str(year))
#         elif version == "_I":
#             CONTEST_PATH = os.path.join(AIME_PATH, str(year), "1")
#         else:
#             CONTEST_PATH = os.path.join(AIME_PATH, str(year), "2")
#         os.makedirs(CONTEST_PATH, exist_ok=True)
#         for problem_num in range(1, 16):
#             if problem_num != 15 or version == "_II":
#                 continue
#
#             images = []
#
#             PROBLEM_PATH = os.path.join(CONTEST_PATH, str(problem_num))
#             os.makedirs(PROBLEM_PATH, exist_ok=True)
#             # TODO: write latex to latex.txt in os.path.join(CONTEST_PATH, "latex.txt")
#             txt = open(os.path.join(PROBLEM_PATH, "latex.txt")).read()
#             print(txt)
#             print(PROBLEM_PATH)
#             os.makedirs(PROBLEM_PATH, exist_ok=True)
#             STATEMENT_PATH = os.path.join(PROBLEM_PATH, "statement.png")
#             sympy.preview(txt, viewer='file',
#                           filename=STATEMENT_PATH, euler=False,
#                           dvioptions=["-T", "tight", "-z", "0", "--truecolor", "-D 600", "-bg", "Transparent", "-fg",
#                                       "White"])
#             im = Image.open(STATEMENT_PATH)
#             im = image_process(im, 10)
#             im.save(STATEMENT_PATH)
#             IMAGE_FOLDER = os.path.join(PROBLEM_PATH, "images")
#             os.makedirs(IMAGE_FOLDER, exist_ok=True)
#             IMAGE_INDEX = 0
#             for image in images:
#                 if image.startswith('//latex'):
#                     image = "https:" + image
#                 IMAGE_PATH = os.path.join(IMAGE_FOLDER, str(IMAGE_INDEX) + ".png")
#                 urllib.request.urlretrieve(image, IMAGE_PATH)
#                 pil_image = Image.open(IMAGE_PATH)
#                 layer = Image.new('RGB', pil_image.size, (255, 255, 255))
#                 layer.paste(pil_image, (0, 0))
#                 layer.save(IMAGE_PATH)
#                 IMAGE_INDEX += 1
# # #USAJMO Problems: Started from 2010, only one contest per year, 6 problems.
# # for year in range(2017, 2020):
# #     CONTEST_PATH = os.path.join(USAJMO_PATH, str(year))
# #     os.makedirs(CONTEST_PATH, exist_ok=True)
# #
# #     for problem_num in range(1, 7):
# #         URL = f"https://artofproblemsolving.com/wiki/index.php/{year}_USAJMO_Problems/Problem_{problem_num}"
# #         if not (year == 2013 and problem_num == 3):
# #             latex, images = prob_statement(URL)
# #         else:
# #             latex, images = prob_statement("https://artofproblemsolving.com/wiki/index.php/2013_USAMO_Problems/Problem_1")
# #         PROBLEM_PATH = os.path.join(CONTEST_PATH, str(problem_num))
# #         os.makedirs(PROBLEM_PATH, exist_ok=True)
# #         # TODO: write latex to latex.txt in os.path.join(CONTEST_PATH, "latex.txt")
# #         txt = open(os.path.join(PROBLEM_PATH, "latex.txt"), "w+")
# #         txt.write(latex)
# #         txt.close()
#         print(PROBLEM_PATH)
#         print(latex)
#
#         STATEMENT_PATH = os.path.join(PROBLEM_PATH, "statement.png")
#         sympy.preview(latex, viewer='file',
#                       filename=STATEMENT_PATH, euler=False,
#                       dvioptions=["-T", "tight", "-z", "0", "--truecolor", "-D 600", "-bg", "Transparent", "-fg",
#                                   "White"])
#         im = Image.open(STATEMENT_PATH)
#         im = image_process(im, 10)
#         im.save(STATEMENT_PATH)
#         IMAGE_FOLDER = os.path.join(PROBLEM_PATH, "images")
#         os.makedirs(IMAGE_FOLDER, exist_ok=True)
#         IMAGE_INDEX = 0
#         for image in images:
#             if image.startswith('//latex'):
#                 image = "https:" + image
#             IMAGE_PATH = os.path.join(IMAGE_FOLDER, str(IMAGE_INDEX) + ".png")
#             urllib.request.urlretrieve(image, IMAGE_PATH)
#             pil_image = Image.open(IMAGE_PATH)
#             layer = Image.new('RGB', pil_image.size, (255, 255, 255))
#             layer.paste(pil_image, (0, 0))
#             layer.save(IMAGE_PATH)
#             IMAGE_INDEX += 1
#USAMO Problems: Started from 1972, only one contest per year, 5 problems from 1972 to 1995, 6 problems from 1996 to present.
# for year in range(2017, 2020):
#     CONTEST_PATH = os.path.join(USAMO_PATH, str(year))
#     os.makedirs(os.path.dirname(CONTEST_PATH),exist_ok=True)
#     if year <= 1995:
#         max_probs = 6
#     else:
#         max_probs = 7
#     for problem_num in range(1, max_probs):
#         URL = f"https://artofproblemsolving.com/wiki/index.php/{year}_USAMO_Problems/Problem_{problem_num}"
#         if not (year == 2017 and problem_num == 3):
#             latex, images = prob_statement(URL)
#         else:
#             latex = "No solution."
#             images = []
#         PROBLEM_PATH = os.path.join(CONTEST_PATH, str(problem_num))
#
#         print(PROBLEM_PATH)
#         print(latex)
#         latex = latex.replace('ă','a')
#         os.makedirs(PROBLEM_PATH, exist_ok=True)
#         # TODO: write latex to latex.txt in os.path.join(CONTEST_PATH, "latex.txt")
#         txt = open(os.path.join(PROBLEM_PATH, "latex.txt"), "w+")
#         txt.write(latex)
#         txt.close()
#         STATEMENT_PATH = os.path.join(PROBLEM_PATH, "statement.png")
#         sympy.preview(latex, viewer='file',
#                       filename=STATEMENT_PATH, euler=False,
#                       dvioptions=["-T", "tight", "-z", "0", "--truecolor", "-D 600", "-bg", "Transparent", "-fg",
#                                   "White"])
#         im = Image.open(STATEMENT_PATH)
#         im = image_process(im, 10)
#         im.save(STATEMENT_PATH)
#         IMAGE_FOLDER = os.path.join(PROBLEM_PATH, "images")
#         os.makedirs(IMAGE_FOLDER,exist_ok=True)
#         IMAGE_INDEX = 0
#         for image in images:
#             if image.startswith('//latex'):
#                 image = "https:" + image
#             IMAGE_PATH = os.path.join(IMAGE_FOLDER, str(IMAGE_INDEX) + ".png")
#             urllib.request.urlretrieve(image, IMAGE_PATH)
#             pil_image = Image.open(IMAGE_PATH)
#             layer = Image.new('RGB', pil_image.size, (255, 255, 255))
#             layer.paste(pil_image, (0, 0))
#             layer.save(IMAGE_PATH)
#             IMAGE_INDEX += 1