HOME/Articles/

pil example down (snippet)

Article Outline

Python pil example 'down'

Functions in program:

  • def pre_image_check():
  • def calculateParallel(arr, threads=2):
  • def squareNumber(item):
  • def read_csv_data(csv_path):
  • def resize_image(image_name):
  • def blank_check(name, pil_data):
  • def chunks(l, n):

Modules used in program:

  • import time
  • import numpy as np
  • import argparse
  • import os
  • import shutil
  • import requests
  • import csv

python down

Python pil example: down

import csv
import requests
import shutil
import os
import argparse
import numpy as np
import time
from tqdm import tqdm
from PIL import Image, ImageOps
from concurrent.futures import ThreadPoolExecutor

blank_data = Image.open('blank.jpg')
blank2_data = Image.open('blank2.jpg')

blank_arr = np.array(blank_data)
blank2_arr = np.array(blank2_data)

original_path = '/root/original/'
resize_path = '/root/resize/'
csv_path = 'main.csv'
pbar = None


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]


def blank_check(name, pil_data):
    pil_data = pil_data.convert('L')

    size_data = None 

    if pil_data.size == blank_data.size:
        size_data = blank_arr
    elif pil_data.size == blank2_data.size:
        size_data = blank2_arr
    else:
        return False

    image_differ = np.sum(np.array(pil_data) - size_data)

    # same case --> True
    if image_differ == 0:
        os.remove('{}{}'.format(original_path, name))
        return True
    else:
        return False


def resize_image(image_name):
    img = Image.open(original_path + image_name)

    if blank_check(image_name, img):
        return 

    fit_img = ImageOps.fit(img, (256, 256), Image.ANTIALIAS)
    fit_img.save(original_path + image_name)

    fit_img = ImageOps.fit(img, (28, 28), Image.ANTIALIAS)
    fit_img.save(resize_path + image_name)


def read_csv_data(csv_path):
    data_arr = []

    with open(csv_path, 'r') as theFile:
        reader = csv.DictReader(theFile)

        for line in reader:
            data_arr.append(line)

    return data_arr


def squareNumber(item):
    r = requests.get(item['original_url'], stream=True)
    if r.status_code == 200:
        with open('{}{}.jpg'.format(original_path, item['image_id']), 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)

        resize_image('{}.jpg'.format(item['image_id']))

    pbar.update(1)


# function to be mapped over
def calculateParallel(arr, threads=2):
    with ThreadPoolExecutor(max_workers=threads) as executor:
        executor.map(squareNumber, arr)


def pre_image_check():
    prev_down_files = os.listdir(resize_path)
    original_files = os.listdir(original_path)

    for name in os.listdir(original_path):
        if not name in prev_down_files:
            print(name + 'delete')
            os.remove('{}{}'.format(original_path, name))
        print(name)

if __name__ == "__main__":
    pre_image_check()

    temp_arr = read_csv_data(csv_path)
    unique_arr = []

    original_files = os.listdir(original_path)

    for item in temp_arr:
        if not '{}.jpg'.format(item['image_id']) in original_files:
            unique_arr.append(item)

    print(len(unique_arr))
    for tt in list(chunks(unique_arr, 1000)):
        pbar = tqdm(total=len(tt))
        calculateParallel(tt, 200)