其他分享
首页 > 其他分享> > 【语义分割专题】语义分割相关工作--ENet网络相关工作

【语义分割专题】语义分割相关工作--ENet网络相关工作

作者:互联网

ENet: A Deep Neural Network Architecture for Real-Time Semantic Segmentation

Paszke, A., Chaurasia, A., Kim, S., & Culurciello, E. (2016). ENet: A Deep Neural Network Architecture for Real-Time Semantic Segmentation. ArXiv, abs/1606.02147.

在这里插入图片描述

  # Initial block of the model:
  #         Input
  #        /     \
  #       /       \
  #maxpool2d    conv2d-3x3
  #       \       /  
  #        \     /
  #      concatenate


 # Upsampling bottleneck:
  #     Bottleneck Input
  #        /        \
  #       /          \
  # conv2d-1x1     convTrans2d-1x1
  #      |             | PReLU
  #      |         convTrans2d-3x3
  #      |             | PReLU
  #      |         convTrans2d-1x1
  #      |             |
  # maxunpool2d    Regularizer
  #       \           /  
  #        \         /
  #      Summing + PReLU
  #
  #  Params: 
  #  projection_ratio - ratio between input and output channels
  #  relu - if True: relu used as the activation function else: Prelu us used



  # Regular|Dilated|Downsampling bottlenecks:
  #
  #     Bottleneck Input
  #        /        \
  #       /          \
  # maxpooling2d   conv2d-1x1
  #      |             | PReLU
  #      |         conv2d-3x3
  #      |             | PReLU
  #      |         conv2d-1x1
  #      |             |
  #  Padding2d     Regularizer
  #       \           /  
  #        \         /
  #      Summing + PReLU
  #
  # Params: 
  #  dilation (bool) - if True: creating dilation bottleneck
  #  down_flag (bool) - if True: creating downsampling bottleneck
  #  projection_ratio - ratio between input and output channels
  #  relu - if True: relu used as the activation function else: Prelu us used
  #  p - dropout ratio



  # Asymetric bottleneck:
  #
  #     Bottleneck Input
  #        /        \
  #       /          \
  #      |         conv2d-1x1
  #      |             | PReLU
  #      |         conv2d-1x5
  #      |             |
  #      |         conv2d-5x1
  #      |             | PReLU
  #      |         conv2d-1x1
  #      |             |
  #  Padding2d     Regularizer
  #       \           /  
  #        \         /
  #      Summing + PReLU
  #
  # Params:    
  #  projection_ratio - ratio between input and output channels

论文架构

参考链接 : https://github.com/srihari-humbarwadi/ENet-A-Deep-Neural-Network-Architecture-for-Real-Time-Semantic-Segmentation/blob/master/batch_training.py

在这里插入图片描述

代码

def initial_block(tensor):
    conv = Conv2D(filters=13,kernel_size=(3,3),strides=(2,2),padding="same",name="initial_block_conv",kernel_initializer="he_normal")(tensor)
    pool = MaxPooling2D(pool_size=(2,2),name="initial_blokc_pool")(tensor)
    concat = concatenate([conv,pool],axis=-1,name="initial_block_concat")
    return concat
def bottleneck_encoder(tensor, nfilters, downsampling=False, dilated=False, asymmetric=False, normal=False, drate=0.1,
                       name=''):
    y = tensor
    skip = tensor
    stride = 1
    ksize = 1
    if downsampling:
        stride = 2
        ksize = 2
        skip = MaxPooling2D(pool_size=(2, 2), name=f'max_pool_{name}')(skip)
        skip = Permute((1, 3, 2), name=f'permute_1_{name}')(skip)  # (B, H, W, C) -> (B, H, C, W)
        ch_pad = nfilters - K.int_shape(tensor)[-1]
        skip = ZeroPadding2D(padding=((0, 0), (0, ch_pad)), name=f'zeropadding_{name}')(skip)
        skip = Permute((1, 3, 2), name=f'permute_2_{name}')(skip)  # (B, H, C, W) -> (B, H, W, C)

    y = Conv2D(filters=nfilters // 4, kernel_size=(ksize, ksize), kernel_initializer='he_normal',
               strides=(stride, stride), padding='same', use_bias=False, name=f'1x1_conv_{name}')(y)
    y = BatchNormalization(momentum=0.1, name=f'bn_1x1_{name}')(y)
    y = PReLU(shared_axes=[1, 2], name=f'prelu_1x1_{name}')(y)

    if normal:
        y = Conv2D(filters=nfilters // 4, kernel_size=(3, 3), kernel_initializer='he_normal', padding='same',
                   name=f'3x3_conv_{name}')(y)
    elif asymmetric:
        y = Conv2D(filters=nfilters // 4, kernel_size=(5, 1), kernel_initializer='he_normal', padding='same',
                   use_bias=False, name=f'5x1_conv_{name}')(y)
        y = Conv2D(filters=nfilters // 4, kernel_size=(1, 5), kernel_initializer='he_normal', padding='same',
                   name=f'1x5_conv_{name}')(y)
    elif dilated:
        y = Conv2D(filters=nfilters // 4, kernel_size=(3, 3), kernel_initializer='he_normal',
                   dilation_rate=(dilated, dilated), padding='same', name=f'dilated_conv_{name}')(y)
    y = BatchNormalization(momentum=0.1, name=f'bn_main_{name}')(y)
    y = PReLU(shared_axes=[1, 2], name=f'prelu_{name}')(y)

    y = Conv2D(filters=nfilters, kernel_size=(1, 1), kernel_initializer='he_normal', use_bias=False,
               name=f'final_1x1_{name}')(y)
    y = BatchNormalization(momentum=0.1, name=f'bn_final_{name}')(y)
    y = SpatialDropout2D(rate=drate, name=f'spatial_dropout_final_{name}')(y)

    y = Add(name=f'add_{name}')([y, skip])
    y = PReLU(shared_axes=[1, 2], name=f'prelu_out_{name}')(y)

    return y

def bottleneck_decoder(tensor, nfilters, upsampling=False, normal=False, name=''):
    y = tensor
    skip = tensor
    if upsampling:
        skip = Conv2D(filters=nfilters, kernel_size=(1, 1), kernel_initializer='he_normal', strides=(1, 1),
                      padding='same', use_bias=False, name=f'1x1_conv_skip_{name}')(skip)
        skip = UpSampling2D(size=(2, 2), name=f'upsample_skip_{name}')(skip)

    y = Conv2D(filters=nfilters // 4, kernel_size=(1, 1), kernel_initializer='he_normal', strides=(1, 1),
               padding='same', use_bias=False, name=f'1x1_conv_{name}')(y)
    y = BatchNormalization(momentum=0.1, name=f'bn_1x1_{name}')(y)
    y = PReLU(shared_axes=[1, 2], name=f'prelu_1x1_{name}')(y)

    if upsampling:
        y = Conv2DTranspose(filters=nfilters // 4, kernel_size=(3, 3), kernel_initializer='he_normal', strides=(2, 2),
                            padding='same', name=f'3x3_deconv_{name}')(y)
    elif normal:
        Conv2D(filters=nfilters // 4, kernel_size=(3, 3), strides=(1, 1), kernel_initializer='he_normal',
               padding='same', name=f'3x3_conv_{name}')(y)
    y = BatchNormalization(momentum=0.1, name=f'bn_main_{name}')(y)
    y = PReLU(shared_axes=[1, 2], name=f'prelu_{name}')(y)

    y = Conv2D(filters=nfilters, kernel_size=(1, 1), kernel_initializer='he_normal', use_bias=False,
               name=f'final_1x1_{name}')(y)
    y = BatchNormalization(momentum=0.1, name=f'bn_final_{name}')(y)

    y = Add(name=f'add_{name}')([y, skip])
    y = ReLU(name=f'relu_out_{name}')(y)

    return y
def ENET(input_shape=(None, None, 3), nclasses=11):
    print('. . . . .Building ENet. . . . .')
    img_input = Input(input_shape)

    x = initial_block(img_input)

    x = bottleneck_encoder(x, 64, downsampling=True, normal=True, name='1.0', drate=0.01)
    for _ in range(1, 5):
        x = bottleneck_encoder(x, 64, normal=True, name=f'1.{_}', drate=0.01)

    x = bottleneck_encoder(x, 128, downsampling=True, normal=True, name=f'2.0')
    x = bottleneck_encoder(x, 128, normal=True, name=f'2.1')
    x = bottleneck_encoder(x, 128, dilated=2, name=f'2.2')
    x = bottleneck_encoder(x, 128, asymmetric=True, name=f'2.3')
    x = bottleneck_encoder(x, 128, dilated=4, name=f'2.4')
    x = bottleneck_encoder(x, 128, normal=True, name=f'2.5')
    x = bottleneck_encoder(x, 128, dilated=8, name=f'2.6')
    x = bottleneck_encoder(x, 128, asymmetric=True, name=f'2.7')
    x = bottleneck_encoder(x, 128, dilated=16, name=f'2.8')

    x = bottleneck_encoder(x, 128, normal=True, name=f'3.0')
    x = bottleneck_encoder(x, 128, dilated=2, name=f'3.1')
    x = bottleneck_encoder(x, 128, asymmetric=True, name=f'3.2')
    x = bottleneck_encoder(x, 128, dilated=4, name=f'3.3')
    x = bottleneck_encoder(x, 128, normal=True, name=f'3.4')
    x = bottleneck_encoder(x, 128, dilated=8, name=f'3.5')
    x = bottleneck_encoder(x, 128, asymmetric=True, name=f'3.6')
    x = bottleneck_encoder(x, 128, dilated=16, name=f'3.7')

    x = bottleneck_decoder(x, 64, upsampling=True, name='4.0')
    x = bottleneck_decoder(x, 64, normal=True, name='4.1')
    x = bottleneck_decoder(x, 64, normal=True, name='4.2')

    x = bottleneck_decoder(x, 16, upsampling=True, name='5.0')
    x = bottleneck_decoder(x, 16, normal=True, name='5.1')

    img_output = Conv2DTranspose(nclasses, kernel_size=(2, 2), strides=(2, 2), kernel_initializer='he_normal',
                                 padding='same', name='image_output')(x)
    img_output = Activation('softmax')(img_output)

    model = Model(inputs=img_input, outputs=img_output, name='ENET')
    print('. . . . .Build Compeleted. . . . .')
    return model

其中的loss值定义

def dice_coeff(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return score

def dice_loss(y_true, y_pred):
    loss = 1 - dice_coeff(y_true, y_pred)
    return loss

def total_loss(y_true, y_pred):
    loss = binary_crossentropy(y_true, y_pred) + (3*dice_loss(y_true, y_pred))
    return loss

标签:kernel,分割,name,normal,--,语义,encoder,bottleneck,True
来源: https://blog.csdn.net/Mind_programmonkey/article/details/120843946