In [1]:
%matplotlib inline

Generate Images

This ipython notebook demonstrates how to generate an image dataset with rich ground truth from a virtual environment.

In [2]:
import time; print(time.strftime("The last update of this file: %Y-%m-%d %H:%M:%S", time.gmtime()))
The last update of this file: 2017-10-21 21:26:41

Load some python libraries The dependencies for this tutorials are PIL, Numpy, Matplotlib

In [3]:
from __future__ import division, absolute_import, print_function
import os, sys, time, re, json
import numpy as np
import matplotlib.pyplot as plt

imread = plt.imread
def imread8(im_file):
    ''' Read image as a 8-bit numpy array '''
    im = np.asarray(Image.open(im_file))
    return im

def read_png(res):
    import StringIO, PIL.Image
    img = PIL.Image.open(StringIO.StringIO(res))
    return np.asarray(img)

def read_npy(res):
    import StringIO
    return np.load(StringIO.StringIO(res))

Connect to the game

Load unrealcv python client, do pip install unrealcv first.

In [4]:
from unrealcv import client
client.connect()
if not client.isconnected():
    print('UnrealCV server is not running. Run the game downloaded from http://unrealcv.github.io first.')
    sys.exit(-1)
INFO:__init__:211:Got connection confirm: 'connected to RealisticRendering'

Make sure the connection works well

In [5]:
res = client.request('vget /unrealcv/status')
# The image resolution and port is configured in the config file.
print(res)
Is Listening
Client Connected
9000
Configuration
Config file: /Users/qiuwch/unrealcv/UE4Binaries/RealisticRendering/MacNoEditor/RealisticRendering.app/Contents/UE4/Engine/Binaries/Mac/unrealcv.ini
Port: 9000
Width: 640
Height: 480
FOV: 90.000000
EnableInput: true
EnableRightEye: false

Load a camera trajectory

In [7]:
traj_file = './camera_traj.json' # Relative to this python script
import json; camera_trajectory = json.load(open(traj_file))
# We will show how to record a camera trajectory in another tutorial

Render an image

In [8]:
idx = 1
loc, rot = camera_trajectory[idx]
# Set position of the first camera
client.request('vset /camera/0/location {x} {y} {z}'.format(**loc))
client.request('vset /camera/0/rotation {pitch} {yaw} {roll}'.format(**rot))

# Get image
res = client.request('vget /camera/0/lit lit.png')
print('The image is saved to %s' % res)

# It is also possible to get the png directly without saving to a file
res = client.request('vget /camera/0/lit png')
im = read_png(res)
print(im.shape)

# Visualize the image we just captured
plt.imshow(im)
The image is saved to /Users/qiuwch/unrealcv/UE4Binaries/RealisticRendering/MacNoEditor/RealisticRendering.app/Contents/UE4/Engine/Binaries/Mac/lit.png
(480, 640, 4)
Out[8]:
<matplotlib.image.AxesImage at 0x108dd8310>
../_images/tutorials_generate_images_tutorial_12_2.png

Ground truth generation

Generate ground truth from this virtual scene

In [9]:
res = client.request('vget /camera/0/object_mask png')
object_mask = read_png(res)
res = client.request('vget /camera/0/normal png')
normal = read_png(res)

# Visualize the captured ground truth
plt.imshow(object_mask)
plt.figure()
plt.imshow(normal)
Out[9]:
<matplotlib.image.AxesImage at 0x1097db8d0>
../_images/tutorials_generate_images_tutorial_14_1.png
../_images/tutorials_generate_images_tutorial_14_2.png

Depth is retrieved as a numpy array For UnrealCV < v0.3.8, the depth is saved as an exr file, but this has two issues. 1. Exr is not well supported in Linux 2. It depends on OpenCV to read exr file, which is hard to install

In [10]:
res = client.request('vget /camera/0/depth npy')
depth = read_npy(res)
plt.imshow(depth)
Out[10]:
<matplotlib.image.AxesImage at 0x10905d090>
../_images/tutorials_generate_images_tutorial_16_1.png

Get object information

List all the objects of this virtual scene

In [11]:
scene_objects = client.request('vget /objects').split(' ')
print('Number of objects in this scene:', len(scene_objects))

# TODO: replace this with a better implementation
class Color(object):
    ''' A utility class to parse color value '''
    regexp = re.compile('\(R=(.*),G=(.*),B=(.*),A=(.*)\)')
    def __init__(self, color_str):
        self.color_str = color_str
        match = self.regexp.match(color_str)
        (self.R, self.G, self.B, self.A) = [int(match.group(i)) for i in range(1,5)]

    def __repr__(self):
        return self.color_str

id2color = {} # Map from object id to the labeling color
for obj_id in scene_objects:
    color = Color(client.request('vget /object/%s/color' % obj_id))
    id2color[obj_id] = color
    # print('%s : %s' % (obj_id, str(color)))
Number of objects in this scene: 296

Parse the segmentation mask

In [12]:
def match_color(object_mask, target_color, tolerance=3):
    match_region = np.ones(object_mask.shape[0:2], dtype=bool)
    for c in range(3): # r,g,b
        min_val = target_color[c] - tolerance
        max_val = target_color[c] + tolerance
        channel_region = (object_mask[:,:,c] >= min_val) & (object_mask[:,:,c] <= max_val)
        match_region &= channel_region

    if match_region.sum() != 0:
        return match_region
    else:
        return None

id2mask = {}
for obj_id in scene_objects:
    color = id2color[obj_id]
    mask = match_color(object_mask, [color.R, color.G, color.B], tolerance = 3)
    if mask is not None:
        id2mask[obj_id] = mask
# This may take a while
# TODO: Need to find a faster implementation for this

Clean up resources

In [19]:
client.disconnect()