In [1]:
%matplotlib inline

Generate Images

This ipython notebook demonstrates how to generate an image dataset with rich ground truth from a virtual environment.

In [2]:
import time; print(time.strftime("The last update of this file: %Y-%m-%d %H:%M:%S", time.gmtime()))
The last update of this file: 2017-10-21 21:26:41

Load some python libraries The dependencies for this tutorials are PIL, Numpy, Matplotlib

In [3]:
from __future__ import division, absolute_import, print_function
import os, sys, time, re, json
import numpy as np
import matplotlib.pyplot as plt

imread = plt.imread
def imread8(im_file):
    ''' Read image as a 8-bit numpy array '''
    im = np.asarray(
    return im

def read_png(res):
    import StringIO, PIL.Image
    img =
    return np.asarray(img)

def read_npy(res):
    import StringIO
    return np.load(StringIO.StringIO(res))

Connect to the game

Load unrealcv python client, do pip install unrealcv first.

In [4]:
from unrealcv import client
if not client.isconnected():
    print('UnrealCV server is not running. Run the game downloaded from first.')
INFO:__init__:211:Got connection confirm: 'connected to RealisticRendering'

Make sure the connection works well

In [5]:
res = client.request('vget /unrealcv/status')
# The image resolution and port is configured in the config file.
Is Listening
Client Connected
Config file: /Users/qiuwch/unrealcv/UE4Binaries/RealisticRendering/MacNoEditor/
Port: 9000
Width: 640
Height: 480
FOV: 90.000000
EnableInput: true
EnableRightEye: false

Load a camera trajectory

In [7]:
traj_file = './camera_traj.json' # Relative to this python script
import json; camera_trajectory = json.load(open(traj_file))
# We will show how to record a camera trajectory in another tutorial

Render an image

In [8]:
idx = 1
loc, rot = camera_trajectory[idx]
# Set position of the first camera
client.request('vset /camera/0/location {x} {y} {z}'.format(**loc))
client.request('vset /camera/0/rotation {pitch} {yaw} {roll}'.format(**rot))

# Get image
res = client.request('vget /camera/0/lit lit.png')
print('The image is saved to %s' % res)

# It is also possible to get the png directly without saving to a file
res = client.request('vget /camera/0/lit png')
im = read_png(res)

# Visualize the image we just captured
The image is saved to /Users/qiuwch/unrealcv/UE4Binaries/RealisticRendering/MacNoEditor/
(480, 640, 4)
<matplotlib.image.AxesImage at 0x108dd8310>

Ground truth generation

Generate ground truth from this virtual scene

In [9]:
res = client.request('vget /camera/0/object_mask png')
object_mask = read_png(res)
res = client.request('vget /camera/0/normal png')
normal = read_png(res)

# Visualize the captured ground truth
<matplotlib.image.AxesImage at 0x1097db8d0>

Depth is retrieved as a numpy array For UnrealCV < v0.3.8, the depth is saved as an exr file, but this has two issues. 1. Exr is not well supported in Linux 2. It depends on OpenCV to read exr file, which is hard to install

In [10]:
res = client.request('vget /camera/0/depth npy')
depth = read_npy(res)
<matplotlib.image.AxesImage at 0x10905d090>

Get object information

List all the objects of this virtual scene

In [11]:
scene_objects = client.request('vget /objects').split(' ')
print('Number of objects in this scene:', len(scene_objects))

# TODO: replace this with a better implementation
class Color(object):
    ''' A utility class to parse color value '''
    regexp = re.compile('\(R=(.*),G=(.*),B=(.*),A=(.*)\)')
    def __init__(self, color_str):
        self.color_str = color_str
        match = self.regexp.match(color_str)
        (self.R, self.G, self.B, self.A) = [int( for i in range(1,5)]

    def __repr__(self):
        return self.color_str

id2color = {} # Map from object id to the labeling color
for obj_id in scene_objects:
    color = Color(client.request('vget /object/%s/color' % obj_id))
    id2color[obj_id] = color
    # print('%s : %s' % (obj_id, str(color)))
Number of objects in this scene: 296

Parse the segmentation mask

In [12]:
def match_color(object_mask, target_color, tolerance=3):
    match_region = np.ones(object_mask.shape[0:2], dtype=bool)
    for c in range(3): # r,g,b
        min_val = target_color[c] - tolerance
        max_val = target_color[c] + tolerance
        channel_region = (object_mask[:,:,c] >= min_val) & (object_mask[:,:,c] <= max_val)
        match_region &= channel_region

    if match_region.sum() != 0:
        return match_region
        return None

id2mask = {}
for obj_id in scene_objects:
    color = id2color[obj_id]
    mask = match_color(object_mask, [color.R, color.G, color.B], tolerance = 3)
    if mask is not None:
        id2mask[obj_id] = mask
# This may take a while
# TODO: Need to find a faster implementation for this

Clean up resources

In [19]: