Article Outline
Python pil example 'dreamImage'
Functions in program:
def objective_guide(dst):
def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
def make_step(net, step_size=1.5, end='inception_4c/output',
def objective_L2(dst):
def deprocess(net, img):
def preprocess(net, img):
Modules used in program:
import caffe
import argparse
import PIL.Image
import scipy.ndimage as nd
import numpy as np
import os.path, os, time, sys
python dreamImage
Python pil example: dreamImage
# DreamImage.py - parameterized deepdream with support for multiple models,
# auto-image-scaling, guided dreaming and kaleido mode (self-guided).
# Jim Bumgardner 7-12-15
# this assumes the deploy file is called 'deploy.prototxt' - you may need
# to rename it if using a different model such as googlenet_places205.
# sample invocations that produce nice results:
# python dreamImage.py -i $1.jpg -model ../../caffe/models/googlenet_places205 -end inception_3a/output -iter 20 -octaves 5 -o $1_m3a.jpg
# python dreamImage.py -i $1.jpg -model ../../caffe/models/googlenet_places205 -end inception_3b/output -iter 20 -octaves 5 -o $1_m3b.jpg
# python dreamImage.py -i $1.jpg -model ../../caffe/models/googlenet_places205 -end inception_4a/output -iter 30 -octaves 6 -o $1_m4a.jpg
# python dreamImage.py -i $1.jpg -model ../../caffe/models/bvlc_googlenet -end inception_3b/output -iter 20 -octaves 5 -o $1_g3b.jpg
# python dreamImage.py -i $1.jpg -model ../../caffe/models/bvlc_googlenet -end inception_3a/output -iter 20 -octaves 5 -o $1_g3a.jpg
# python dreamImage.py -i $1.jpg -model ../../caffe/models/bvlc_googlenet -end inception_4a/output -iter 20 -octaves 6 -o $1_g4a.jpg
import os.path, os, time, sys
os.environ['GLOG_minloglevel'] = '3'
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from google.protobuf import text_format
import argparse
import caffe
parser = argparse.ArgumentParser(description='DreamImage - process a single frame')
parser.add_argument('-i',default='clouds_512.jpg', help='input image')
parser.add_argument('-o',default='untitled.jpg', help='output image name')
parser.add_argument('-model',default='../../caffe/models/bvlc_googlenet', help='caffe model path')
parser.add_argument('-deploy',default='', help='caffe model deploy file override')
parser.add_argument('-end',default='inception_3b/output', help='caffe end layer')
parser.add_argument('-iter', default=10, type=int, help='Iterations, def=10')
parser.add_argument('-octaves', default=4, type=int, help='Octaves, def=4')
parser.add_argument('-oscale', default=1.4, type=float, help='Octave scale, def=1.4')
parser.add_argument('-debug', default=0, type=int, help='Debugging level')
parser.add_argument('-keys', default=False, action='store_true', help='Output keys')
parser.add_argument('-gpu', default=False, action='store_true', help='Use gpu')
parser.add_argument('-force', default=False, action='store_true', help='Force file overwrite')
parser.add_argument('-kaleido', default=False, action='store_true', help='Kaleido mode - uses source image as guide')
parser.add_argument('-guide', default='', help='Guide image')
parser.add_argument('-raw', default=False, action='store_true', help='Don\'t rescale image')
args = parser.parse_args()
st = time.time()
imgName = args.i
if not os.path.exists(imgName):
if 'jpg' in imgName and os.path.exists(imgName[:len(imgName)-4]+'.png'):
imgName = imgName[:len(imgName)-4]+'.png'
else:
print("Can't find image")
sys.exit()
outName = args.o
if os.path.isfile(outName) and not args.force:
print("File exists")
sys.exit()
model_path = args.model # '../../caffe/models/bvlc_googlenet/' # substitute your path here
if model_path[-1] == '/':
model_path = model_path[:-1]
if args.deploy == '':
net_fn = model_path + '/deploy.prototxt'
else:
net_fn = args.deploy
param_fn = model_path + '/' + (model_path.split('/'))[-1] + '.caffemodel'
# Patching model to be able to compute gradients.
# Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
model = caffe.io.caffe_pb2.NetParameter()
text_format.Merge(open(net_fn).read(), model)
model.force_backward = True
open('tmp.prototxt', 'w').write(str(model))
net = caffe.Classifier('tmp.prototxt', param_fn,
mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
if args.gpu:
caffe.set_mode_gpu()
caffe.set_device(0)
if args.keys:
print(net.blobs.keys())
sys.exit()
# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
return np.dstack((img + net.transformer.mean['data'])[::-1])
def objective_L2(dst):
dst.diff[:] = dst.data
def make_step(net, step_size=1.5, end='inception_4c/output',
jitter=32, clip=True, objective=objective_L2):
'''Basic gradient ascent step.'''
src = net.blobs['data'] # input image is stored in Net's 'data' blob
dst = net.blobs[end]
ox, oy = np.random.randint(-jitter, jitter+1, 2)
src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift
net.forward(end=end)
objective(dst) # specify the optimization objective
# dst.diff[:] = dst.data # specify the optimization objective
net.backward(start=end)
g = src.diff[0]
# apply normalized ascent step to the input image
src.data[:] += step_size/np.abs(g).mean() * g
src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image
if clip:
bias = net.transformer.mean['data']
src.data[:] = np.clip(src.data, -bias, 255-bias)
def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
end='inception_4c/output', clip=True, **step_params):
# prepare base images for all octaves
octaves = [preprocess(net, base_img)]
for i in xrange(octave_n-1):
octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))
src = net.blobs['data']
detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
for octave, octave_base in enumerate(octaves[::-1]):
h, w = octave_base.shape[-2:]
if octave > 0:
# upscale details from the previous octave
h1, w1 = detail.shape[-2:]
detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)
src.reshape(1,3,h,w) # resize the network's input image size
src.data[0] = octave_base+detail
print(octave, end, iter_n,"...")
for i in xrange(iter_n):
make_step(net, end=end, clip=clip, **step_params)
# visualization
vis = deprocess(net, src.data[0])
if not clip: # adjust image contrast if clipping is disabled
vis = vis*(255.0/np.percentile(vis, 99.98))
# showarray(vis)
# print(octave, i, end, vis.shape)
# clear_output(wait=True)
# extract details produced on the current octave
detail = src.data[0]-octave_base
# returning the resulting image
return deprocess(net, src.data[0])
def objective_guide(dst):
global guide_features
x = dst.data[0].copy()
y = guide_features
ch = x.shape[0]
x = x.reshape(ch,-1)
y = y.reshape(ch,-1)
A = x.T.dot(y) # compute the matrix of dot-products with guide features
dst.diff[0].reshape(ch,-1)[:] = y[:,A.argmax(1)] # select ones that match best
srcImage = PIL.Image.open(imgName).convert('RGB')
if not args.raw:
(w, h) = srcImage.size
if w > 1024 or h > 1024:
srcImage.thumbnail((1024,1024),PIL.Image.ANTIALIAS)
img = np.float32(srcImage)
if args.kaleido:
kaleid = PIL.Image.open(imgName)
kaleid.thumbnail((224,224),PIL.Image.ANTIALIAS)
guide = np.float32(kaleid)
h, w = guide.shape[:2]
src, dst = net.blobs['data'], net.blobs[args.end]
src.reshape(1,3,h,w)
src.data[0] = preprocess(net, guide)
net.forward(end=args.end)
guide_features = dst.data[0].copy()
objective = objective_guide
elif args.guide != '':
guide = np.float32(PIL.Image.open(args.guide))
h, w = guide.shape[:2]
src, dst = net.blobs['data'], net.blobs[args.end]
src.reshape(1,3,h,w)
src.data[0] = preprocess(net, guide)
net.forward(end=args.end)
guide_features = dst.data[0].copy()
objective = objective_guide
else:
objective = objective_L2
frame = deepdream(net, img, end=args.end, iter_n=args.iter, octave_scale=args.oscale, octave_n=args.octaves, objective=objective)
PIL.Image.fromarray(np.uint8(frame)).save(outName)
print("Elapsed: %.2f" % (time.time()-st))
comments = """
Observations
clouds_1024 at (20/6/4a) takes 88 secs (CPU Only - need to figure out how to compile for GPU...)
clouds_1024 at (20/4/4a) takes 75 secs (CPU Only - need to figure out how to compile for GPU...)
do_venice.sh (30/6/4a) clouds_1024 takes 153 (with one other dd process) 187 secs (with two other dd processes running)
512x281 image takes 12.3 secs
1024x563 image takes 46.48 per frame
3264x1794 image should takes approx 7 minutes
bvlc_google takes 55.76
Finetune_Flickr takes
Layer choice
'data',
'conv1/7x7_s2',
'pool1/3x3_s2',
'pool1/norm1',
'conv2/3x3_reduce',
'conv2/3x3',
'conv2/norm2',
'pool2/3x3_s2',
'pool2/3x3_s2_pool2/3x3_s2_0_split_0',
'pool2/3x3_s2_pool2/3x3_s2_0_split_1',
'pool2/3x3_s2_pool2/3x3_s2_0_split_2',
'pool2/3x3_s2_pool2/3x3_s2_0_split_3',
'inception_3a/1x1',
'inception_3a/3x3_reduce',
'inception_3a/3x3',
'inception_3a/5x5_reduce',
'inception_3a/5x5',
'inception_3a/pool',
'inception_3a/pool_proj',
'inception_3a/output',
'inception_3a/output_inception_3a/output_0_split_0',
'inception_3a/output_inception_3a/output_0_split_1',
'inception_3a/output_inception_3a/output_0_split_2',
'inception_3a/output_inception_3a/output_0_split_3',
'inception_3b/1x1',
'inception_3b/3x3_reduce',
'inception_3b/3x3',
'inception_3b/5x5_reduce', <-- STRIPES
'inception_3b/5x5',
'inception_3b/pool',
'inception_3b/pool_proj',
'inception_3b/output',
'pool3/3x3_s2',
'pool3/3x3_s2_pool3/3x3_s2_0_split_0',
'pool3/3x3_s2_pool3/3x3_s2_0_split_1',
'pool3/3x3_s2_pool3/3x3_s2_0_split_2',
'pool3/3x3_s2_pool3/3x3_s2_0_split_3',
'inception_4a/1x1',
'inception_4a/3x3_reduce',
'inception_4a/3x3',
'inception_4a/5x5_reduce', <-- SWIRLS
'inception_4a/5x5',
'inception_4a/pool',
'inception_4a/pool_proj',
'inception_4a/output',
'inception_4a/output_inception_4a/output_0_split_0',
'inception_4a/output_inception_4a/output_0_split_1',
'inception_4a/output_inception_4a/output_0_split_2',
'inception_4a/output_inception_4a/output_0_split_3',
'inception_4b/1x1',
'inception_4b/3x3_reduce',
'inception_4b/3x3',
'inception_4b/5x5_reduce',
'inception_4b/5x5',
'inception_4b/pool',
'inception_4b/pool_proj',
'inception_4b/output',
'inception_4b/output_inception_4b/output_0_split_0',
'inception_4b/output_inception_4b/output_0_split_1',
'inception_4b/output_inception_4b/output_0_split_2',
'inception_4b/output_inception_4b/output_0_split_3',
'inception_4c/1x1',
'inception_4c/3x3_reduce',
'inception_4c/3x3',
'inception_4c/5x5_reduce',
'inception_4c/5x5',
'inception_4c/pool',
'inception_4c/pool_proj',
'inception_4c/output',
'inception_4c/output_inception_4c/output_0_split_0',
'inception_4c/output_inception_4c/output_0_split_1',
'inception_4c/output_inception_4c/output_0_split_2',
'inception_4c/output_inception_4c/output_0_split_3',
'inception_4d/1x1',
'inception_4d/3x3_reduce',
'inception_4d/3x3',
'inception_4d/5x5_reduce',
'inception_4d/5x5',
'inception_4d/pool',
'inception_4d/pool_proj',
'inception_4d/output',
'inception_4d/output_inception_4d/output_0_split_0',
'inception_4d/output_inception_4d/output_0_split_1',
'inception_4d/output_inception_4d/output_0_split_2',
'inception_4d/output_inception_4d/output_0_split_3',
'inception_4e/1x1',
'inception_4e/3x3_reduce',
'inception_4e/3x3',
'inception_4e/5x5_reduce',
'inception_4e/5x5',
'inception_4e/pool',
'inception_4e/pool_proj',
'inception_4e/output',
'pool4/3x3_s2',
'pool4/3x3_s2_pool4/3x3_s2_0_split_0',
'pool4/3x3_s2_pool4/3x3_s2_0_split_1',
'pool4/3x3_s2_pool4/3x3_s2_0_split_2',
'pool4/3x3_s2_pool4/3x3_s2_0_split_3',
'inception_5a/1x1',
'inception_5a/3x3_reduce',
'inception_5a/3x3',
'inception_5a/5x5_reduce',
'inception_5a/5x5',
'inception_5a/pool',
'inception_5a/pool_proj',
'inception_5a/output',
'inception_5a/output_inception_5a/output_0_split_0',
'inception_5a/output_inception_5a/output_0_split_1',
'inception_5a/output_inception_5a/output_0_split_2',
'inception_5a/output_inception_5a/output_0_split_3',
'inception_5b/1x1',
'inception_5b/3x3_reduce',
'inception_5b/3x3',
'inception_5b/5x5_reduce',
'inception_5b/5x5',
'inception_5b/pool',
'inception_5b/pool_proj',
'inception_5b/output',
'pool5/7x7_s1',
'loss3/classifier',
'prob'
"""
Python links
- Learn Python: https://pythonbasics.org/
- Python Tutorial: https://pythonprogramminglanguage.com