一辆真正的无人驾驶车

在开学前,最后把无人驾驶车的思路理一下。是的,是在硬件和软件上与真正的无人驾驶车基本一模一样的小车。我准备用常见的遥控汽车进行改造,基本思路是这样的:

常规的遥控汽车包括控制前轮转向的舵机,控制车速的模块,驱动车前进的直流电机,电池,和接收遥控指令的无线接收器。

我们要对遥控汽车进行改造,去掉无线接收器,改为由树莓派控制。树莓派对摄像头的视频进行分析后,得出要在车道内前进,应该如何控制舵机和直流电机,并发出相应的指令驱动舵机和直流电机。

等暑假再见了。

参考文献

Inside a Donkeycar

为机器人换个好使的脑子(十九)

接下来,最后的扫尾工作:我们要让六足机器人找到兔子。

这个比较简单,我们把识别出兔子的框中心与摄像头画面中心对准,就行了。

代码:

# -*- coding: utf-8 -*-
# NCSDK API V2
from mvnc import mvncapi as mvnc
import numpy as np
import cv2
import time
import sonar
import time
import spider
import threading

GRAPH = 'graph'
CLASSES = ('background', 'bunny', 'doll', 'doraemon', 'snoopy')
distance = 0.0
input_size = (300, 300)
np.random.seed(3)
colors = 255 * np.random.rand(len(CLASSES), 3)

def preprocess(src):
  img = cv2.resize(src, input_size)
  img = img - 127.5
  img = img / 127.5
  return img.astype(np.float32)

# get the distance
def updateDistance():
  global distance
  while True:
    try:
      distance = sonar.getDistance()
    except Exception as e:
    print('Error: %s' %e)
    distance = 0.0
    time.sleep(0.2)

th_distance = threading.Thread(target=updateDistance)
th_distance.setDaemon(True)
th_distance.start()

# discover our device
devices = mvnc.enumerate_devices()
device = mvnc.Device(devices[0])
device.open()

# load graph onto the device
graph = mvnc.Graph('graph1')
with open(GRAPH, 'rb') as f:
  graph_file = f.read()
input_fifo, output_fifo = graph.allocate_with_fifos(device, graph_file)

capture = cv2.VideoCapture(0)
_, image = capture.read()
height, width = image.shape[:2]

spider.runActionGroup('standUp', 1)

while capture.isOpened():
  stime = time.time()
  _, image = capture.read()
  image_pro = preprocess(image)
  graph.queue_inference_with_fifo_elem(input_fifo, output_fifo, image_pro, 'object1')
  output, user_obj = output_fifo.read_elem()

  valid_boxes = int(output[0])
  for i in range(7, 7 * (1 + valid_boxes), 7):
    if not np.isfinite(sum(output[i + 1: i + 7])):
      continue
    clss = CLASSES[int(output[i + 1])]
    conf = output[i + 2]
    color = colors[int(output[i + 1])]
    # get the bonding box
    x1 = max(0, int(output[i + 3] * width))
    y1 = max(0, int(output[i + 4] * height))
    x2 = min(width, int(output[i + 5] * width))
    y2 = min(height, int(output[i + 6] * height))
    # display class label and bonding box
    label = '{}: {:.0f}%'.format(clss, conf * 100)
    image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
    y = y1 - 5 if y1 - 15 > 15 else y1 + 18
    image = cv2.putText(image, label, (x1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.8,color, 2)

    # follow the white rabbit: align the center of bonding box with the screen center
    if clss == 'bunny':
      x_diff = (x1+x2)/2 - width/2
      y_diff = (y1+y2)/2 - height/2
      if distance > 10:
        if x_diff > 20:
          spider.runActionGroup('turn-right', 0)
        elif x_diff < -20:
          spider.runActionGroup('turn-left', 0)
        elif y_diff > 20:
          spider.runActionGroup('head_up', 0)
        elif y_diff < -20:
          spider.runActionGroup('head_down', 0)
        else:
          spider.runActionGroup('walk', 0)
      else:
        spider.runActionGroup('stop', 0)

  cv2.imshow('frame', image)
  print('FPS = {:.1f}'.format(1 / (time.time() - stime)))
  if cv2.waitKey(1) & 0xFF == ord('q'):
    break

capture.release()
cv2.destroyAllWindows()
input_fifo.destroy()
output_fifo.destroy()
graph.destroy()
device.close()
device.destroy()

为机器人换个好使的脑子(十八)

终于,我们可以让六足机器人张脑子了!

1. 生成NCS需要的graph

$ cd MobileNet-SSD
$ mkdir models
$ cp no_bn.prototxt no_bn.caffemodel models
$ mvNCCompile models/no_bn.prototxt \
    -w models/no_bn.caffemodel \
    -s 12 -is 300 300 -o models/mobilenet_graph

2. 测试

#coding=utf8
# NCSDK API V2
from mvnc import mvncapi as mvnc
import numpy as np
import cv2
import time

GRAPH = 'models/mobilenet_graph'
VIDEO = '/home/sean/Downloads/robot.mp4'
fourcc = cv2.VideoWriter_fourcc(*'XVID')
CLASSES = ('background', 'bunny', 'doll', 'doraemon', 'snoopy')
input_size = (300, 300)
np.random.seed(3)
colors = 255 * np.random.rand(len(CLASSES), 3)

def preprocess(src):
    img = cv2.resize(src, input_size)
    img = img - 127.5
    img = img / 127.5
    return img.astype(np.float32)

# discover our device
devices = mvnc.enumerate_devices()
device = mvnc.Device(devices[0])
device.open()

# load graph onto the device
graph = mvnc.Graph('graph1')
with open(GRAPH, 'rb') as f:
    graph_file = f.read()
input_fifo, output_fifo = graph.allocate_with_fifos(device, graph_file)

capture = cv2.VideoCapture(VIDEO)
_, image = capture.read()
height, width = image.shape[:2]
out = cv2.VideoWriter('robot.avi',fourcc, 10.0, (width,height))
while capture.isOpened():
    stime = time.time()
    _, image = capture.read()
    image_pro = preprocess(image)
    graph.queue_inference_with_fifo_elem(input_fifo, output_fifo, image_pro, 'object1')
    output, user_obj = output_fifo.read_elem()
    valid_boxes = int(output[0])
    for i in range(7, 7 * (1 + valid_boxes), 7):
        if not np.isfinite(sum(output[i + 1: i + 7])):
            continue
        clss = CLASSES[int(output[i + 1])]
        conf = output[i + 2]
        color = colors[int(output[i + 1])]
        x1 = max(0, int(output[i + 3] * width))
        y1 = max(0, int(output[i + 4] * height))
        x2 = min(width, int(output[i + 5] * width))
        y2 = min(height, int(output[i + 6] * height))
        label = '{}: {:.0f}%'.format(clss, conf * 100)
        image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        y = y1 - 5 if y1 - 15 > 15 else y1 + 18
        image = cv2.putText(image, label, (x1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.8,color, 2)

    fps = 'FPS = {:.1f}'.format(1 / (time.time() - stime))
    image = cv2.putText(image, fps, (width-220, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2,(0,255,0), 3)    
    cv2.imshow('frame', image)
    out.write(image)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

capture.release()
cv2.destroyAllWindows()
input_fifo.destroy()
output_fifo.destroy()
graph.destroy()
device.close()
device.destroy()

耶!从六足机器人的摄像头,我们可以正确地识别娃娃了。

为机器人换个好使的脑子(十七)

训练大概要2个多小时,完成后,我们可以在笔记本上先测试一下。

1.生成deployment model

python merge_bn.py --model example/MobileNetSSD_deploy.prototxt --weights snapshot/mobilenet_iter_20000.caffemodel

生成no_bn.prototxt和no_bn.caffemodel

name: "MobileNet-SSD"
input: "data"
input_shape {
  dim: 1
  dim: 3
  dim: 300
  dim: 300
}
layer {
  name: "conv0"
  type: "Convolution"
  bottom: "data"
  top: "conv0"
...

layer {
  name: "detection_out"
  type: "DetectionOutput"
  bottom: "mbox_loc"
  bottom: "mbox_conf_flatten"
  bottom: "mbox_priorbox"
  top: "detection_out"
  include {
    phase: TEST
  }
  detection_output_param {
    num_classes: 5
    share_location: true
    background_label_id: 0
    nms_param {
      nms_threshold: 0.44999998807907104
      top_k: 100
    }
    code_type: CENTER_SIZE
    keep_top_k: 100
    confidence_threshold: 0.25
  }
}

2. 测试

# inference.py
import numpy as np
import sys,os
import cv2
caffe_root = '/home/young/caffe/'
sys.path.insert(0, caffe_root + 'python')
import caffe

net_file= 'no_bn.prototxt'
caffe_model='no_bn.caffemodel'

if not os.path.exists(caffe_model):
  print(caffe_model + " does not exist")
  exit()
if not os.path.exists(net_file):
  print(net_file + " does not exist")
  exit()
net = caffe.Net(net_file,caffe_model,caffe.TEST)

CLASSES = ('background', 'bunny', 'doll', 'doraemon', 'snoopy')
np.random.seed(3)
colors = 255 * np.random.rand(len(CLASSES), 3)

def preprocess(src):
  img = cv2.resize(src, (300,300))
  img = img - 127.5
  img = img * 0.007843
return img

def postprocess(img, out):
  h = img.shape[0]
  w = img.shape[1]
  box = out['detection_out'][0,0,:,3:7] * np.array([w, h, w, h])
  cls = out['detection_out'][0,0,:,1]
  conf = out['detection_out'][0,0,:,2]
return (box.astype(np.int32), conf, cls)

def detect(imgfile): 
  origimg = cv2.imread(imgfile)
  img = preprocess(origimg)
  img = img.astype(np.float32)
  img = img.transpose((2, 0, 1))
  net.blobs['data'].data[...] = img
  out = net.forward()
  box, conf, cls = postprocess(origimg, out)

  for i in range(len(box)):
    p1 = (box[i][0], box[i][1])
    p2 = (box[i][2], box[i][3])
    cv2.rectangle(origimg, p1, p2, (0,255,0),2)
    p3 = (max(p1[0], 15), max(p1[1]-10, 10))
    title = "%s:%.2f" % (CLASSES[int(cls[i])], conf[i])
    cv2.putText(origimg, title, p3, cv2.FONT_ITALIC, 0.8, colors[i], 2)
  
  cv2.imshow("Caffe SSD", origimg)
  k = cv2.waitKey(0) & 0xff 
  if k == 27 : return False
  return True

for f in os.listdir(test_dir):
  if detect(test_dir + "/" + f) == False:
    break

将测试图片拷贝到images目录下

测试训练集里没有的图片

为机器人换个好使的脑子(十六)

现在,我们可以开始训练卷积神经网络了。我们用的是MobileNet-SSD。

  1. 下载MobileNet-SSD
$ cd $CAFFE_ROOT/examples
$ git clone --depth 1 https://github.com/chuanqi305/MobileNet-SSD
  1. 创建lmdb数据链接
$ cd MobileNet-SSD
$ ln -s $HOME/Sean/Sean/lmdb/Sean_test_lmdb test_lmdb
$ ln -s $HOME/Sean/Sean/lmdb/Sean_train_lmdb train_lmdb
$ ln -s $HOME/Sean/labelmap.prototxt labelmap.prototxt
  1. 生成train/test/deploy prototxt
$ ./gen_model.sh 5    //5为类别数,要加上background
在example下生成3个prototxt:
  1. 训练
编辑$CAFFE_ROOT/examples/MobileNet-SSD/train.sh
#!/bin/sh
if ! test -f example/MobileNetSSD_train.prototxt ;then
    echo "error: example/MobileNetSSD_train.prototxt does not exist."
    echo "please use the gen_model.sh to generate your own model."
        exit 1
fi

mkdir -p snapshot
../../build/tools/caffe train -solver="solver_train.prototxt" \
-weights="mobilenet_iter_73000.caffemodel" -gpu=0
开始训练
$ ./train.sh

为机器人换个好使的脑子(十五)

接下来,我们要生成lmdb数据。在caffe/data下创建Sean目录,及create_data.sh

$ ~/caffe/data/Sean/create_data.sh
# create_data.sh
root_dir="$HOME/caffe"
cd $root_dir
redo=1
dataset_name="Sean"
data_root_dir="$HOME/$dataset_name"
mapfile="$data_root_dir/labelmap.prototxt"
anno_type="detection"
db="lmdb"
min_dim=0
max_dim=0
width=0
height=0

extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
then
  extra_cmd="$extra_cmd --redo"
fi
for subset in test trainval
do
  python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $data_root_dir/structure/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name
done

最终数据集

Sean/
  |- images/            图片
  |- labels/            标注
  |- structure/         图片和标注的对应文件
  |- Sean/
       |- lmdb/
            |- Sean_test_lmdb/
            |- Sean_train_lmdb/

为机器人换个好使的脑子(十四)

我们现在需要对数据集进行一些预处理。

Sean/
  |- images/            图片
  |- labels/            标注
  |- structure/         图片和标注的对应文件

1. 在数据集Sean目录下生成标签文件labelmap.prototxt

item {
  name: "none_of_the_above"
  label: 0
  display_name: "background"
}
item {
  name: "bunny"
  label: 1
  display_name: "bunny"
}
item {
  name: "doll"
  label: 2
  display_name: "doll"
}
item {
  name: "doraemon"
  label: 3
  display_name: "doraemon"
}
item {
  name: "snoopy"
  label: 4
  display_name: "snoopy"
}

2. 在structure目录下生成trainval.txt和test.txt

import os
from os.path import basename

def createMapTxt(baseDirDataSet, target):
    buffer = ''
    baseDir = baseDirDataSet+'/images/'+target
    for filename in os.listdir(baseDir):
        filenameOnly, file_extension = os.path.splitext(filename)
        s = 'images/'+filenameOnly+'.jpg'+' '+'labels/'+filenameOnly+'.xml\n'
        print (repr(s))
        img_file, anno = s.strip("\n").split(" ")
        print(repr(img_file), repr(anno))
        buffer+=s  
with open(baseDirDataSet+'/structure/'+target+'.txt', 'w') as file:
        file.write(buffer)
    print('Done')    

createMapTxt('/home/young/Sean', 'trainval')
createMapTxt('/home/young/Sean', 'test')

trainval.txt的内容大致如下:

images/Bunny(8).jpg labels/Bunny(8).xml
images/Bunny(78).jpg labels/Bunny(78).xml
images/doraemon(148).jpg labels/doraemon(148).xml
images/Snoopy(108).jpg labels/Snoopy(108).xml

为机器人换个好使的脑子(十三)

接下来,我们要开始最艰苦、繁琐的工作了,我们要在照片上标注出目标在哪里。

1.创建数据集目录

Sean/
  |- images/            图片
  |- labels/            标注
  |- structure/         图片和标注的对应文件
把刚才准备好的照片复制到images目录下。
2. 调整图片
我们拍摄的照片一般都有3~4M,而卷积神经网络处理的图片,一般都在300×300像素。
300×300?!这么小,神经网络能认清楚吗?爸爸告诉我,这主要受到两个因素的限制:
1. 考虑GPU的内存大小。我们一般都是把一批(64张)照片放到GPU进行训练,这样,GPU的内存就是: 4 × batch_size × num_feature_maps × height × width。对300×300像素的图片,每批64张,64个feature maps,GPU的内存大约为1.4G。如果是高清照片,4000×4000,GPU内存需要245G,没有GPU能满足这个需求。
        2. 卷积神经网络的感受野。如果使用了大尺寸的图片,卷积网络的深度将大得无法想         象。
为了让训练时间合理,我们需要调整图片尺寸。
# resize.py
from PIL import Image
import os
from os.path import basename

def resizeImages(baseDir):
    basewidth = 600
    for filename in os.listdir(baseDir):
        filenameOnly, file_extension = os.path.splitext(filename)
        if (file_extension in [".jpg", '.png']):
            filepath = baseDir + os.sep + filename
            img = Image.open(filepath)
            wpercent = (basewidth/float(img.size[0]))
            hsize = int((float(img.size[1])*float(wpercent)))
            img = img.resize((basewidth,hsize), Image.ANTIALIAS)
            img.save(filepath)
            print (filenameOnly, "Done")
    print('Done')
baseDir = "/home/Sean/Sean/images"
resizeImages(baseDir)
3. 标注图片
我们使用LabelImage来标注图片(https://github.com/tzutalin/labelImg)。图片在images目录下,标注在labels目录下。天啊,这个居然花了我三天时间…