HOME/Articles/

pil example extract thumb (snippet)

Article Outline

Python pil example 'extract thumb'

Functions in program:

  • def read_tag(endian, fp):
  • def read_ifd(endian, fp):
  • def read_tiff(fp):

Modules used in program:

  • import struct
  • import io

python extract thumb

Python pil example: extract thumb

import io
import struct
from fractions import Fraction
from picamera import PiCamera
from PIL import Image
from time import sleep

def read_tiff(fp):
    # The file should contain IFD0, IFD1, and an Exif IFD. As it happens, the
    # Exif IFD also points to interop IFD. We don't actually need the Exif IFD
    # or the interop IFD for extracting the thumbnail but what the hell, let's
    # parse *everything*!
    #
    # NOTE This could probably be faster with mmap but this is a simple demo

    # Figure out the endianness of the TIFF
    endian = {
        b'II': '<',
        b'MM': '>',
        }[fp.read(2)]
    # Check the version number (always 42)
    assert struct.unpack(endian + 'h', fp.read(2)) == (42,)
    # Grab the offset of IFD0; its contents will tell us where IFD1 and the
    # Exif IFD are.
    ifds = {}
    offset, = struct.unpack(endian + 'L', fp.read(4))
    assert offset != 0
    fp.seek(offset)
    ifds['ifd0'], offset = read_ifd(endian, fp)
    fp.seek(offset)
    ifds['ifd1'], offset = read_ifd(endian, fp)
    assert offset == 0
    fp.seek(ifds['ifd0'][34665]) # Exif IFD pointer
    ifds['exif'], offset = read_ifd(endian, fp)
    assert offset == 0
    fp.seek(ifds['exif'][40965]) # Interoperability IFD pointer
    ifds['interop'], offset = read_ifd(endian, fp)
    assert offset == 0
    return ifds

def read_ifd(endian, fp):
    # Read the number of tags in the IFD
    count, = struct.unpack(endian + 'H', fp.read(2))
    tags = {}
    assert count >= 0
    # Loop over reading each tag; last_tag is just to check the tags are all
    # ascending order as the spec demands (you can cut out all these asserts
    # if you want ;)
    last_tag = -1
    while count:
        tag, value = read_tag(endian, fp)
        assert tag >= last_tag
        last_tag = tag
        if value is not None:
            tags[tag] = value
        count -= 1
    # Read and return offset of next IFD
    return (tags,) + struct.unpack(endian + 'L', fp.read(4))

def read_tag(endian, fp):
    # Read and parse a tag. The following table's from the TIFF 6.0 standard
    # and just defines the various data-types, their Python struct-module
    # equivalent spec, and how to unpack the result
    TiffTypes = {
        1:  ('B',  lambda v: v[0]),         # byte
        2:  ('s',  lambda v: v[0].decode('ascii')[:-1]), # ascii (strip NUL terminator)
        3:  ('H',  lambda v: v[0]),         # short
        4:  ('L',  lambda v: v[0]),         # long
        5:  ('LL', lambda v: Fraction(*v)), # rational
        6:  ('b',  lambda v: v[0]),         # signed byte
        7:  ('s',  lambda v: v[0]),         # undefined
        8:  ('h',  lambda v: v[0]),         # signed short
        9:  ('l',  lambda v: v[0]),         # signed long
        10: ('ll', lambda v: Fraction(*v)), # signed rational
        11: ('f',  lambda v: v[0]),         # float
        12: ('d',  lambda v: v[0]),         # double
        }
    TiffTag = struct.Struct(endian + 'HHLL')
    tag, datatype, count, offset = TiffTag.unpack(fp.read(TiffTag.size))
    try:
        typestr, converter = TiffTypes[datatype]
    except KeyError:
        return tag, None
    else:
        valuelen = struct.calcsize(typestr) * count
        typestr = '%s%d%s' % (endian, count, typestr)
        if valuelen <= 4:
            value = struct.unpack(typestr, struct.pack(endian + 'L', offset)[:valuelen])
        else:
            pos = fp.tell()
            try:
                fp.seek(offset)
                value = struct.unpack(typestr, fp.read(valuelen))
            finally:
                fp.seek(pos)
        return tag, converter(value)


# Open a file for the capture, one for the extracted thumbnail, and another for
# the exif data (any of these could be in-memory streams; it's just a little
# easier to debug stuff with actual files)
image_file = io.open('image.jpg', 'w+b')
thumb_file = io.open('thumb.jpg', 'w+b')
exif_file = io.open('exif.tiff', 'w+b')

# Capture an image containing a thumbnail in the Exif data
camera = PiCamera(resolution='720p')
sleep(2)
camera.capture(image_file, quality=95, thumbnail=(320, 180, 50))

# At this point, image.jpg contains the full image, and all the Exif data
# including the thumbnail. Unfortunately, while PIL knows how to extract the
# Exif block from the JPEG (which saves me writing any JPEG parsing code), it
# doesn't know how to extract the thumbnail from the TIFF in the Exif. But
# TIFF's IFD isn't that hard to parse with a little help from struct...
image_file.seek(0)
image = Image.open(image_file)

# Make sure the Exif magic is in place and strip it off, then write the rest
# to the exif_file (which is named ".tiff" because it's actually a TIFF file)
data = image.info['exif']
assert data[:6] == b'Exif\x00\x00'
exif_file.write(data[6:])

# Now use the data in IFD1 to find the thumbnail data
exif_file.seek(0)
ifds = read_tiff(exif_file)
exif_file.seek(ifds['ifd1'][513])
thumb_file.write(exif_file.read(ifds['ifd1'][514]))