HOME/Articles/

pil example train (snippet)

Article Outline

Python pil example 'train'

Functions in program:

  • def model_fn(features, labels, mode):
  • def input_fn(mode):

Modules used in program:

  • import tensorflow as tf
  • import os
  • import cv2
  • import uuid
  • import matplotlib.pyplot as plt

python train

Python pil example: train

from data import get_webcam_input_fn, get_dataset
import matplotlib.pyplot as plt
import uuid
import cv2
import os
import tensorflow as tf


# Directories - PLEASE CONFIGURE IT
models_root = '/.../models'  # Output dir for the model and tensorboard logs
train_images_root = '/.../train-backgrounds'  # Train images (download random images from the internet)
valid_images_root = '/.../valid-backgrounds'  # Valid images (download random images from the internet)
logo_jpg_filename = '/.../logo.jpg'  # A jpg of the "Data Driven Montreal" logo (download https://cdn.evbuc.com/images/39269779/90171842189/2/logo.png , print, take a picture using your webcam, crop a square around the logo and save)


# Config
image_size = (240, 320, 3)
batch_size = 16
train_steps = 200


# Model ID
model_uuid = uuid.uuid4()
model_dir = os.path.join(models_root, f'{model_uuid}')


# Input Function
def input_fn(mode):

    if mode == tf.estimator.ModeKeys.TRAIN:
        dataset = get_dataset(image_size, train_images_root, logo_jpg_filename)
    else:
        dataset = get_dataset(image_size, valid_images_root, logo_jpg_filename)

    dataset = dataset \
        .batch(batch_size) \
        .prefetch(1)

    return dataset


# Model Function
def model_fn(features, labels, mode):

    with tf.name_scope('Input'):
        input_tensor = tf.identity(features)

    with tf.name_scope('Model'):
        net = input_tensor
        net = tf.layers.conv2d(net, 32, (3, 3), activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, (2, 2), (2, 2))
        net = tf.layers.conv2d(net, 64, (3, 3), activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, (2, 2), (2, 2))
        net = tf.layers.conv2d(net, 128, (3, 3), activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, (2, 2), (2, 2))
        net = tf.layers.conv2d(net, 128, (3, 3), activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, (2, 2), (2, 2))
        net = tf.layers.conv2d(net, 128, (3, 3), activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, (2, 2), (2, 2))
        net = tf.layers.conv2d(net, 128, (3, 3), activation=tf.nn.relu)
        net = tf.layers.max_pooling2d(net, (2, 2), (2, 2))
        net = tf.layers.flatten(net)
        net = tf.layers.dense(net, 256, tf.nn.relu)
        net = tf.layers.dense(net, 512, tf.nn.relu)
        net = tf.layers.dense(net, 2, tf.nn.sigmoid, name='Predictions')
        predictions = {'predictions': net}

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions['image'] = input_tensor
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    with tf.name_scope('GroundTruth'):
        labels = tf.identity(labels, name='labels')

    with tf.name_scope('Loss'):
        loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions['predictions'])

    with tf.name_scope('Summaries'):
        tf.summary.image('images', input_tensor)
        tf.summary.scalar('loss', loss)
        tf.summary.histogram('predictions', predictions['predictions'])
        tf.summary.histogram('targets', labels)

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss)

    with tf.name_scope('Optimizer'):
        optimizer = tf.train.AdamOptimizer()

        updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(updates):
            train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)


# Estimator
run_config = tf.estimator.RunConfig(
    model_dir=model_dir,
    save_summary_steps=10,
    save_checkpoints_secs=900,
    keep_checkpoint_max=1,
    log_step_count_steps=10
)
estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)


# Training Loop
train_spec = tf.estimator.TrainSpec(input_fn=input_fn, max_steps=train_steps)
eval_spec = tf.estimator.EvalSpec(input_fn=input_fn, steps=20, name='eval', throttle_secs=30, start_delay_secs=1)

tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


# Visualize predictions
predictions = estimator.predict(input_fn)
for i, p in zip(range(batch_size), predictions):
    plt.subplot(4, 4, i+1)
    img = p['image']
    coords = p['predictions']

    plt.imshow(img / 2.0 + 0.5)
    plt.plot(coords[0] * image_size[1], coords[1] * image_size[0], 'or')


# Webcam
cap, webcam_input_fn = get_webcam_input_fn(image_size)

# Predictions
predictions = estimator.predict(webcam_input_fn)


# Visualize Webcam
cv2.namedWindow('Press "q" to quit', cv2.WINDOW_NORMAL)
for p in predictions:

    # Webcam image + Predictions
    img = (p['image'][:, :, ::-1] + 1.0) / 2.0
    coords = p['predictions']

    # Draw prediction
    img = cv2.circle(img, (int(coords[0] * img.shape[1]), int(coords[1] * img.shape[0])), 2, (0, 255, 0), 19)

    # Display the resulting frame
    cv2.imshow('Press "q" to quit', img[:, ::-1, :])

    # Wait for 'q' click
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        cv2.destroyAllWindows()
        break