diff --git a/accuracy.py b/accuracy.py index efe43d4..a2619d2 100644 --- a/accuracy.py +++ b/accuracy.py @@ -2,11 +2,11 @@ import scipy.misc import glob - +#path to the ground truth images data1 = sorted(glob.glob(r'/media/lc/vge_lc/DL_DATE_BUILDING/WHU/cropped image tiles and raster labels/test/gt/*.png')) # data1 = sorted(glob.glob(r'/home/lc/Jupyter_projects/resatt/Urban 3D Challenge Data/d_test/gt/*.png')) # data1=sorted(glob.glob('/media/lc/vge_lc/spacenet/shanghai_vegas_test_result/test_label/*.png')) -# data1=sorted(glob.glob('/media/lc/vge_lc/urban3/uba512_test/test_gt/*.png')) +#path to the predicted image which value belong to 0 or 255 for visualization data2 = sorted(glob.glob('./test_result_temp/*.png')) @@ -69,18 +69,17 @@ def cal_iou(): b = np.sum(l2, axis=0) IoU = a * 1.0 / b print('iou:{}'.format(IoU)) - mean_iu = np.sum(IoU[:2]) / 2 - print('mean_iu:{}'.format(mean_iu)) + # mean_iu = np.sum(IoU[:2]) / 2 + # print('mean_iu:{}'.format(mean_iu)) precision = np.sum(TP, axis=0) / (np.sum(TP, axis=0) + np.sum(FP, axis=0)) print('--precision:{}'.format(precision)) recall = np.sum(TP, axis=0) / (np.sum(TP, axis=0) + np.sum(FN, axis=0)) print('--recall:{}'.format(recall)) F_score = 2 * (precision * recall) / (precision + recall) - print('F_score:{}'.format(F_score)) + print('F1_score:{}'.format(F_score)) mean_ap = np.sum(l3) * 1.0 / np.sum(l4) print('mean_ap:{}'.format(mean_ap)) - return IoU, mean_iu, mean_ap - + c, d, mean_ap = cal_iou() diff --git a/checkpoint/mapnet_model_whu.pb b/checkpoint/mapnet_model_whu.pb new file mode 100644 index 0000000..e8156bd Binary files /dev/null and b/checkpoint/mapnet_model_whu.pb differ diff --git a/dataset/test/img/7000027.png b/dataset/test/img/7000027.png deleted file mode 100644 index 56d6888..0000000 Binary files a/dataset/test/img/7000027.png and /dev/null differ diff --git a/dataset/test/img/7000028.png b/dataset/test/img/7000028.png deleted file mode 100644 index f4b8a76..0000000 Binary files a/dataset/test/img/7000028.png and /dev/null differ diff --git a/dataset/test/img/7000029.png b/dataset/test/img/7000029.png deleted file mode 100644 index 24c0bf6..0000000 Binary files a/dataset/test/img/7000029.png and /dev/null differ diff --git a/dataset/test/img/7000030.png b/dataset/test/img/7000030.png deleted file mode 100644 index 1178aba..0000000 Binary files a/dataset/test/img/7000030.png and /dev/null differ diff --git a/dataset/test/img/7000031.png b/dataset/test/img/7000031.png deleted file mode 100644 index a95c8e7..0000000 Binary files a/dataset/test/img/7000031.png and /dev/null differ diff --git a/dataset/test/lab/7000027.png b/dataset/test/lab/7000027.png deleted file mode 100644 index 3c38a9b..0000000 Binary files a/dataset/test/lab/7000027.png and /dev/null differ diff --git a/dataset/test/lab/7000028.png b/dataset/test/lab/7000028.png deleted file mode 100644 index b216fa7..0000000 Binary files a/dataset/test/lab/7000028.png and /dev/null differ diff --git a/dataset/test/lab/7000029.png b/dataset/test/lab/7000029.png deleted file mode 100644 index fc78c6b..0000000 Binary files a/dataset/test/lab/7000029.png and /dev/null differ diff --git a/dataset/test/lab/7000030.png b/dataset/test/lab/7000030.png deleted file mode 100644 index 58b8450..0000000 Binary files a/dataset/test/lab/7000030.png and /dev/null differ diff --git a/dataset/test/lab/7000031.png b/dataset/test/lab/7000031.png deleted file mode 100644 index 0191a06..0000000 Binary files a/dataset/test/lab/7000031.png and /dev/null differ diff --git a/dataset/train/img/7000000.png b/dataset/train/img/7000000.png deleted file mode 100644 index 8d23b20..0000000 Binary files a/dataset/train/img/7000000.png and /dev/null differ diff --git a/dataset/train/img/7000001.png b/dataset/train/img/7000001.png deleted file mode 100644 index 02c0a2a..0000000 Binary files a/dataset/train/img/7000001.png and /dev/null differ diff --git a/dataset/train/img/7000002.png b/dataset/train/img/7000002.png deleted file mode 100644 index 48efa41..0000000 Binary files a/dataset/train/img/7000002.png and /dev/null differ diff --git a/dataset/train/img/7000003.png b/dataset/train/img/7000003.png deleted file mode 100644 index 75b856c..0000000 Binary files a/dataset/train/img/7000003.png and /dev/null differ diff --git a/dataset/train/img/7000006.png b/dataset/train/img/7000006.png deleted file mode 100644 index 6561034..0000000 Binary files a/dataset/train/img/7000006.png and /dev/null differ diff --git a/dataset/train/img/7000007.png b/dataset/train/img/7000007.png deleted file mode 100644 index 33d6922..0000000 Binary files a/dataset/train/img/7000007.png and /dev/null differ diff --git a/dataset/train/lab/7000000.png b/dataset/train/lab/7000000.png deleted file mode 100644 index d5b955f..0000000 Binary files a/dataset/train/lab/7000000.png and /dev/null differ diff --git a/dataset/train/lab/7000001.png b/dataset/train/lab/7000001.png deleted file mode 100644 index 8d56529..0000000 Binary files a/dataset/train/lab/7000001.png and /dev/null differ diff --git a/dataset/train/lab/7000002.png b/dataset/train/lab/7000002.png deleted file mode 100644 index d11dba7..0000000 Binary files a/dataset/train/lab/7000002.png and /dev/null differ diff --git a/dataset/train/lab/7000003.png b/dataset/train/lab/7000003.png deleted file mode 100644 index 77cd765..0000000 Binary files a/dataset/train/lab/7000003.png and /dev/null differ diff --git a/dataset/train/lab/7000006.png b/dataset/train/lab/7000006.png deleted file mode 100644 index 47837da..0000000 Binary files a/dataset/train/lab/7000006.png and /dev/null differ diff --git a/dataset/train/lab/7000007.png b/dataset/train/lab/7000007.png deleted file mode 100644 index 96f5700..0000000 Binary files a/dataset/train/lab/7000007.png and /dev/null differ diff --git a/image/SpaceNet_result.png b/image/SpaceNet_result.png new file mode 100644 index 0000000..ea8d6a2 Binary files /dev/null and b/image/SpaceNet_result.png differ diff --git a/image/Urban_3D_result.png b/image/Urban_3D_result.png new file mode 100644 index 0000000..f8d6d82 Binary files /dev/null and b/image/Urban_3D_result.png differ diff --git a/image/result.png b/image/whu_result.png similarity index 100% rename from image/result.png rename to image/whu_result.png diff --git a/load_data.py b/load_data.py index c287f99..172153f 100644 --- a/load_data.py +++ b/load_data.py @@ -21,33 +21,13 @@ def load_batch(x, y): def prepare_data(): - # whu 512*512 4736 - # img = np.array(sorted( - # glob.glob(r'/media/lc/vge_lc/DL_DATE_BUILDING/WHU/cropped image tiles and raster labels/train/image/*.png'))) - # label = np.array(sorted( - # glob.glob(r'/media/lc/vge_lc/DL_DATE_BUILDING/WHU/cropped image tiles and raster labels/train/gt/*.png'))) - # - # test_img = np.array(sorted( - # glob.glob(r'/media/lc/vge_lc/DL_DATE_BUILDING/WHU/cropped image tiles and raster labels/test/image/*.png'))) - # test_label = np.array(sorted( - # glob.glob(r'/media/lc/vge_lc/DL_DATE_BUILDING/WHU/cropped image tiles and raster labels/test/gt/*.png'))) - img = np.array(sorted(glob.glob(r'./dataset/train/img/*.png'))) - label = np.array(sorted(glob.glob(r'./dataset/train/lab/*.png'))) - test_img = np.array(sorted(glob.glob(r'./dataset/test/img/*.png'))) - test_label = np.array(sorted(glob.glob(r'./dataset/test/lab/*.png'))) + train_img = np.array(sorted(glob.glob(r'./dataset/train/img/*.png'))) + train_label = np.array(sorted(glob.glob(r'./dataset/train/lab/*.png'))) + valid_img = np.array(sorted(glob.glob(r'./dataset/valid/img/*.png'))) + valid_label = np.array(sorted(glob.glob(r'./dataset/valid/lab/*.png'))) - - # img = np.array(sorted(glob.glob(r'/media/lc/vge_lc/spacenet/train_rgb_image/*.png'))) - # label = np.array(sorted(glob.glob(r'/media/lc/vge_lc/spacenet/train_label_image/*.png'))) - # test_img=sorted(glob.glob(r'/media/lc/vge_lc/spacenet/shanghai_vegas_test_result/test_image/*.png')) - # test_label=sorted(glob.glob('/media/lc/vge_lc/spacenet/shanghai_vegas_test_result/test_label/*.png')) - - # img = np.array(sorted(glob.glob(r'/home/lc/Jupyter_projects/resatt/Urban 3D Challenge Data/d_train/img/*.png'))) - # label = np.array(sorted(glob.glob(r'/home/lc/Jupyter_projects/resatt/Urban 3D Challenge Data/d_train/gt/*.png'))) - # test_img = np.array(sorted(glob.glob(r'/home/lc/Jupyter_projects/resatt/Urban 3D Challenge Data/d_test/img/*.png'))) - # test_label = np.array(sorted(glob.glob(r'/home/lc/Jupyter_projects/resatt/Urban 3D Challenge Data/d_test/gt/*.png'))) - return img, label, test_img, test_label + return train_img, train_label, valid_img, valid_label def data_augmentation(image, label): diff --git a/model/__pycache__/mapnet.cpython-36.pyc b/model/__pycache__/mapnet.cpython-36.pyc deleted file mode 100644 index 556122a..0000000 Binary files a/model/__pycache__/mapnet.cpython-36.pyc and /dev/null differ diff --git a/model/mapnet.py b/model/mapnet.py index 40b9542..612371a 100644 --- a/model/mapnet.py +++ b/model/mapnet.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + import tensorflow as tf -# from keras.layers import UpSampling2D +from keras.layers import UpSampling2D def conv2d(input,filters,kernel_size=3,stride=1,padding='SAME'): @@ -9,7 +11,7 @@ def conv2d(input,filters,kernel_size=3,stride=1,padding='SAME'): def bn(input,is_training=True): - return tf.layers.batch_normalization(input,momentum=0.99,epsilon=1e-3,training=is_training) + return tf.layers.batch_normalization(input,momentum=0.1,epsilon=1e-5,training=is_training) def bottleneck(x, size,is_training,downsampe=False): @@ -29,12 +31,12 @@ def bottleneck(x, size,is_training,downsampe=False): residual = tf.nn.relu(residual) residual = conv2d(residual, size * 4, 1, padding='VALID') out = tf.add(out,residual) + return out def resblock(x, size,is_training): residual = x - out = bn(x, is_training) out = tf.nn.relu(out) out = conv2d(out, size, 3) @@ -47,28 +49,35 @@ def resblock(x, size,is_training): def stage0(x,is_training): - x = bottleneck(x, 64,is_training, downsampe=True) + x = bottleneck(x, 64,is_training,downsampe=True) x = bottleneck(x, 64,is_training) x = bottleneck(x, 64,is_training) x = bottleneck(x, 64,is_training) + return x -def translayer(x, in_channels, out_channels,is_training): +def transition_layer(x, in_channels, out_channels,is_training): num_in = len(in_channels) num_out = len(out_channels) out = [] + for i in range(num_out): if i < num_in: + residual = bn(x[i], is_training) residual = tf.nn.relu(residual) residual = conv2d(residual, out_channels[i], 3) + out.append(residual) else: + residual = bn(x[-1], is_training) residual = tf.nn.relu(residual) residual = conv2d(residual, out_channels[i], 3, stride=2) + out.append(residual) + return out @@ -84,20 +93,24 @@ def convb(x, block_num, channels,is_training): def featfuse(x, channels, is_training, multi_scale_output=True): out = [] + for i in range(len(channels) if multi_scale_output else 1): residual = x[i] + for j in range(len(channels)): if j > i: if multi_scale_output == False: y = bn(x[j], is_training) y = tf.nn.relu(y) y = conv2d(y, channels[j], 1, padding='VALID') - out.append(tf.keras.layers.UpSampling2D(size=2 ** (j - i))(y)) + + out.append(UpSampling2D(size=2 ** (j - i))(y)) else: y = bn(x[j], is_training) y = tf.nn.relu(y) y = conv2d(y, channels[i], 1, padding='VALID') - y = tf.keras.layers.UpSampling2D(size=2 ** (j - i))(y) + + y = UpSampling2D(size=2 ** (j - i))(y) residual = tf.add(residual, y) elif j < i: @@ -106,23 +119,28 @@ def featfuse(x, channels, is_training, multi_scale_output=True): if k == i - j - 1: y = bn(y, is_training) y = tf.nn.relu(y) + # y = conv2d(y, channels[i], 3, stride=2) y = conv2d(y, channels[i], 1) y = tf.layers.max_pooling2d(y, 2, 2) else: y = bn(y, is_training) y = tf.nn.relu(y) + # y = conv2d(y, channels[j], 3, stride=2) y = conv2d(y, channels[j], 1) y = tf.layers.max_pooling2d(y, 2, 2) residual = tf.add(residual, y) + # residual = tf.nn.relu(residual) out.append(residual) + return out -def convblock(x, channels,is_training, multi_scale_output=True): +def convblock(x, channels,is_training,multi_scale_output=True): residual = convb(x, 4, channels,is_training) - out = featfuse(residual, channels,is_training, multi_scale_output=multi_scale_output) + out = featfuse(residual, channels,is_training, + multi_scale_output=multi_scale_output) return out @@ -133,33 +151,11 @@ def stage(x, num_modules, channels, is_training,multi_scale_output=True): out = convblock(out, channels,is_training, multi_scale_output=False) else: out = convblock(out, channels,is_training) - return out - - -def pyramid_pooling_block(input, bin_sizes): - pool_list = [] - h = input.shape[1] - c = input.shape[-1] - for bin_size in bin_sizes: - pool1 = tf.layers.average_pooling2d(input, (h // bin_size, h // bin_size), (h // bin_size, h // bin_size)) - pool1 = conv2d(pool1, int(c)//4, 1) - pool1 = tf.image.resize_bilinear(pool1, (h, h)) - pool_list.append(pool1) - pool = tf.concat(pool_list, axis=3) - return tf.add(input, pool) - -def spatial_pooling(input): - h,w=input.shape[1],input.shape[2] - p1=tf.image.resize_bilinear(tf.layers.max_pooling2d(input,2,2),(h,w)) - p2 = tf.image.resize_bilinear(tf.layers.max_pooling2d(input, 3, 3), (h, w)) - p3=tf.image.resize_bilinear(tf.layers.max_pooling2d(input,5,5),(h,w)) - p4 = tf.image.resize_bilinear(tf.layers.max_pooling2d(input, 6, 6), (h, w)) - p=tf.concat([p1,p2,p3,p4,input],axis=-1) - return p + return out -def channel_squeeze(input,filters,name=" "): +def channel_squeeze(input,filters,ratio,name=" "): with tf.name_scope(name): squeeze=tf.reduce_mean(input,axis=[1,2]) with tf.name_scope(name+"fc1"): @@ -172,53 +168,75 @@ def channel_squeeze(input,filters,name=" "): return input*result -def mapnet(input, is_training=True): - channels_s2 = [64, 128] - channels_s3 = [64, 128, 256] - num_modules_s2 = 2 - num_modules_s3 = 3 +def spatial_pooling(input): + h,w=input.shape[1],input.shape[2] + p1=tf.image.resize_bilinear(tf.layers.max_pooling2d(input,2,2),(h,w)) + p2 = tf.image.resize_bilinear(tf.layers.max_pooling2d(input, 3, 3), (h, w)) + p3=tf.image.resize_bilinear(tf.layers.max_pooling2d(input,5,5),(h,w)) + p4 = tf.image.resize_bilinear(tf.layers.max_pooling2d(input, 6, 6), (h, w)) + p=tf.concat([p1,p2,p3,p4,input],axis=-1) + return p + + +def mapnet(input,is_training=True): + channels_2 = [64, 128] + channels_3 = [64, 128, 256] + num_modules_2 = 2 + num_modules_3 = 3 - conv_1 = conv2d(input, 64, stride=2) + # input=tf.image.per_image_standardization(input) + conv_1 = conv2d(input, 64) conv_1 = bn(conv_1, is_training) conv_1 = tf.nn.relu(conv_1) conv_2 = conv2d(conv_1, 64) + conv_2=tf.layers.max_pooling2d(conv_2, 2, 2) conv_2 = bn(conv_2, is_training) conv_2 = tf.nn.relu(conv_2) conv_3 = conv2d(conv_2, 64) conv_3 = bn(conv_3, is_training) conv_3 = tf.nn.relu(conv_3) - conv_4 = tf.layers.max_pooling2d(conv_3, 2, 2) + conv_4 = conv2d(conv_3, 64) + conv_4 = bn(conv_4, is_training) + conv_4 = tf.nn.relu(conv_4) + x = tf.layers.max_pooling2d(conv_4, 2, 2) + + la1 = stage0(x,is_training) + tr1 = transition_layer([la1], [256], channels_2,is_training) + + st2 = stage(tr1, num_modules_2, channels_2,is_training) + tr2 = transition_layer(st2, channels_2, channels_3,is_training) - stage1 = stage0(conv_4,is_training) - trans1 = translayer([stage1], [256], channels_s2,is_training) - stage2 = stage(trans1, num_modules_s2, channels_s2,is_training) - trans2 = translayer(stage2, channels_s2, channels_s3,is_training) - stage3 = stage(trans2, num_modules_s3, channels_s3,is_training,multi_scale_output=False) + st3 = stage(tr2, num_modules_3, channels_3,is_training,multi_scale_output=False) - stg3=tf.concat(stage3,axis=-1) - squeeze=channel_squeeze(stg3, stg3.shape[-1], name="squeeze") + st=tf.concat(st3,axis=-1) + + + print(st3) + + seqeese=channel_squeeze(st, st.shape[-1], 1, name="se_leyaer") + + st0=tf.concat([st3[0],st3[1]],axis=-1) + st0=spatial_pooling(st0) + new_feature = tf.concat([st0, seqeese], axis=-1) - spatial=tf.concat([stage3[0],stage3[1]],axis=-1) - # spatial=pyramid_pooling_block(spatial, [1, 2, 4, 8]) - spatial=spatial_pooling(spatial) - new_feature = tf.concat([spatial, squeeze], axis=-1) new_feature = bn(new_feature, is_training) new_feature = tf.nn.relu(new_feature) result=conv2d(new_feature, 128, 1, padding='SAME') - - up1=tf.image.resize_bilinear(result,size=(stage3[0].shape[1]*2,stage3[0].shape[2]*2)) + up1=tf.image.resize_bilinear(result,size=(st3[0].shape[1]*2,st3[0].shape[2]*2)) + print(up1) up1 = bn(up1, is_training) up1 = tf.nn.relu(up1) up1 = conv2d(up1, 64, 3) up2 = tf.image.resize_bilinear(up1, size=(up1.shape[1]*2, up1.shape[2]*2)) + print(up2) up2 = bn(up2, is_training) up2 = tf.nn.relu(up2) up2 = conv2d(up2, 32, 3) up2 = bn(up2, is_training) up2 = tf.nn.relu(up2) - final = conv2d(up2, 1, 1, padding='valid') + final = conv2d(up2, 1, 1, padding='VALID') return final \ No newline at end of file diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..42bdc2e --- /dev/null +++ b/predict.py @@ -0,0 +1,52 @@ +import scipy +import sys +import os +import numpy as np +import tensorflow as tf + +batch_size = 1 +if not tf.test.is_built_with_cuda(): + os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" + os.environ["CUDA_VISIBLE_DEVICES"]="-1" + + +# SpaceNet +# test_img=sorted(glob.glob(r'/media/lc/vge_lc/spacenet/shanghai_vegas_test_result/test_image/*.png')) +# Urban +# test_img = np.array(sorted(glob.glob(r'/home/lc/Jupyter_projects/resatt/Urban 3D Challenge Data/d_test/img/*.png'))) + +def predict(test_img_path,pb_path,save_path): + name=os.listdir(test_img_path) + + with tf.Graph().as_default(): + output_graph_def = tf.GraphDef() + with open(pb_path, "rb") as f: + output_graph_def.ParseFromString(f.read()) + tf.import_graph_def(output_graph_def, name="") + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + input_image_tensor = sess.graph.get_tensor_by_name("Placeholder:0") + output_tensor_name = sess.graph.get_tensor_by_name("conv2d_149/Conv2D:0") + + for j in range(0, len(name)): + x_batch = os.path.join(test_img_path,name[j]) + i = os.path.basename(x_batch) + x_batch = scipy.misc.imread(x_batch) / 255.0 + x_batch = np.expand_dims(x_batch, axis=0) + predict = sess.run(output_tensor_name, feed_dict={input_image_tensor: x_batch}) + predict[predict < 0.5] = 0 + predict[predict >= 0.5] = 1 + result = np.squeeze(predict) + i = i.split('.')[0] + scipy.misc.imsave(save_path+'/{}.png'.format(i), result) + + +# test_img_path=sys.argv[0] +# pb_path =sys.argv[1] +# save_path=sys.argv[2] + +test_img_path="/media/lc/vge_lc/DL_DATE_BUILDING/WHU/cropped image tiles and raster labels/test/image" +pb_path='/home/lc/Jupyter_projects/resatt/MAPNet/checkpoint_ori/mapnet_model_whu.pb' +save_path='./test_result_temp' +predict(test_img_path,pb_path,save_path) + diff --git a/readme.md b/readme.md index 8f87713..be00154 100644 --- a/readme.md +++ b/readme.md @@ -1,48 +1,96 @@ # **MAP-Net: Multi Attending Path Neural Network for Building Footprint Extraction from Remote Sensing Imagery** -## The manuscript -Accurately and efficiently extracting building footprints from a wide range of remote sensed imagery remains a challenge due to their complex structure, variety of scales and diverse appearances. Existing convolutional neural network (CNN)-based building extraction methods are complained that they cannot detect the tiny buildings because the spatial information of CNN feature maps are lost during repeated pooling operations of the CNN, and the large buildings still have inaccurate segmentation edges. Moreover, features extracted by a CNN are always partial which restricted by the size of the respective field, and large-scale buildings with low texture are always discontinuous and holey when extracted. This paper proposes a novel multi attending path neural network (MAP-Net) for accurately extracting multiscale building footprints and precise boundaries. MAP-Net learns spatial localization-preserved multiscale features through a multi-parallel path in which each stage is gradually generated to extract high-level semantic features with fixed resolution. Then, an attention module adaptively squeezes channel-wise features from each path for optimization, and a pyramid spatial pooling module captures global dependency for refining discontinuous building footprints. Experimental results show that MAP-Net outperforms state-of-the-art (SOTA) algorithms in boundary localization accuracy as well as continuity of large buildings. Specifically, our method achieved 0.68\%, 1.74\%, 1.46\% precision, and 1.50\%, 1.53\%, 0.82\% IoU score improvement without increasing computational complexity compared with the latest HRNetv2 on the Urban 3D, Deep Globe and WHU datasets, respectively. +## Related resources + +### [[Paper]]( https://arxiv.org/abs/1910.12060 ) + +### The manuscript + +Accurately and efficiently extracting building footprints from a wide range of remote sensed imagery remains a challenge due to their complex structure, variety of scales and diverse appearances. Existing convolutional neural network (CNN)-based building extraction methods are complained that they cannot detect the tiny buildings because the spatial information of CNN feature maps are lost during repeated pooling operations of the CNN, and the large buildings still have inaccurate segmentation edges. Moreover, features extracted by a CNN are always partial which restricted by the size of the respective field, and large-scale buildings with low texture are always discontinuous and holey when extracted. This paper proposes a novel multi attending path neural network (MAP-Net) for accurately extracting multiscale building footprints and precise boundaries. MAP-Net learns spatial localization-preserved multiscale features through a multi-parallel path in which each stage is gradually generated to extract high-level semantic features with fixed resolution. Then, an attention module adaptively squeezes channel-wise features from each path for optimization, and a pyramid spatial pooling module captures global dependency for refining discontinuous building footprints. Experimental results show that MAP-Net outperforms state-of-the-art (SOTA) algorithms in boundary localization accuracy as well as continuity of large buildings. Specifically, our method achieved 0.68%, 1.74%, 1.46% precision, and 1.50%, 1.53%, 0.82% IoU score improvement without increasing computational complexity compared with the latest HRNetv2 on the Urban 3D, Deep Globe and WHU datasets, respectively. + + The manuscript can be visited via https://arxiv.org/abs/1910.12060 -## Datasets -* [Whu](http://study.rsgis.whu.edu.cn/pages/download/building_dataset.html) -* [Urban](https://spacenetchallenge.github.io/datasets/Urban_3D_Challenge_summary.html) -* [Space Net](https://spacenetchallenge.github.io/datasets/spacenetBuildings-V2summary.html) + +### Datasets: + +* [WHU Building Dataset](http://study.rsgis.whu.edu.cn/pages/download/building_dataset.html) +* [The USSOCOM Urban 3D Challenge](https://spacenetchallenge.github.io/datasets/Urban_3D_Challenge_summary.html) +* [The SpaceNet Buildings Dataset](https://spacenetchallenge.github.io/datasets/spacenetBuildings-V2summary.html) + + + + + + ## The Code ### Requirements: -* tensorflow -* keras -* numpy -* scipy + +* tensorflow = 1.13.1 + +* python = 3.7.2 + +* opencv-python = 4.0.0 + +* keras = 2.2.4 + +* numpy = 1.16.2 + +* scipy = 1.2.1 + + ### Usage: -* Clone the repository: ```git clone https://github.com/lehaifeng/MAPNet.git``` - * Modify the related training and validation dataset paths in load_data.py; - * Hyper-parameters configuration and training are implemented in train.py; +* Training the model: + + * Clone the repository: ```git clone https://github.com/lehaifeng/MAPNet.git``` + + * Modify the related training and validation dataset paths in *load_data.py*; + + ~~~python + #train_img:path to training images + #train_label:train labels with values belongs to 0(background) or 1(building) + #valid_img:path to validation images + #valid_label:labels corresponding to validation images + ~~~ + + * Hyper-parameters configuration and training are implemented in *train.py*; + * The tensorflow implementation of MAP-Net and other related networks are in the model folder; - * test.py load the trained model and predict the test dataset, and accuracy.py evaluate the pixel-level IoU, precision, recall and F1_score metric. + + * *test.py* load the trained model and predict the test dataset, and *accuracy.py* evaluate the pixel-level IoU, precision, recall and F1_score metric. + + + +* Description about how to test trained model on WHU datasets: + + * Download the trained model file of our proposed MAPNet in the checkpoint folder on WHU datasets. + * Modify the *test_img_path*, *pb_path*, and *save_path* according to your file directory in *predict.py* before run it. + * Modify the *data1* and *data2* to *ground truth dir* and *save_dir* in *accuracy.py* to calculate the accuracy. + + * Comments: We have trained the model on train sets which include 4736 cropped image tiles and test model on val and test sets together. ## MAP-Net Structure of MAP-Net -​*Structure of the proposed MAP-Net, which composed of three modules (A) Detail preserved multipath feature extract network; (B) Attention based features adaptive Squeeze and global spatial pooling enhancement module; (C) Up sampling and building footprint extraction module. Conv block is composed of series residual modules to extract features and shared with each path. Gen block generates new parallel path to extract richer semantic features on the basic of Conv block.* +​ *Structure of the proposed MAP-Net, which composed of three modules (A) Detail preserved multipath feature extract network; (B) Attention based features adaptive Squeeze and global spatial pooling enhancement module; (C) Up sampling and building footprint extraction module. Conv block is composed of series residual modules to extract features and shared with each path. Gen block generates new parallel path to extract richer semantic features on the basic of Conv block.* ## Result - + -​*Example of results with the UNet, PSPNet, ResNet101, HRNetv2 and our proposed method respectively on the WHU dataset. (a) Original image. (b) UNet. (c) PSPNet. (d) ResNet101. (e) HRNetv2. (f) Ours. (g) Ground truth.* +​ *Example of results with the UNet, PSPNet, ResNet101, HRNetv2 and our proposed method respectively on the WHU dataset. (a) Original image. (b) UNet. (c) PSPNet. (d) ResNet101. (e) HRNetv2. (f) Ours. (g) Ground truth.* More results can be find in the image folder. ## Citation -If this repo is useful in your research, please kindly consider citing our paper as follow. + ``` Bibtex @article{zhu2019mapnet, diff --git a/test.py b/test.py index 5b1ce85..b2bedd4 100644 --- a/test.py +++ b/test.py @@ -13,7 +13,7 @@ batch_size = 1 img = tf.placeholder(tf.float32, [batch_size, 512, 512, 3]) -# WHU +# test images path to WHU datasets test_img = sorted( glob.glob(r'/media/lc/vge_lc/DL_DATE_BUILDING/WHU/cropped image tiles and raster labels/test/image/*.png')) # SpaceNet @@ -26,8 +26,9 @@ saver = tf.train.Saver(tf.global_variables()) -def save(): +def predict(): tf.global_variables_initializer().run() + #load trained model from checkpoint_dir checkpoint_dir = './checkpoint/' ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: @@ -47,9 +48,10 @@ def save(): predict[predict >= 0.5] = 1 result = np.squeeze(predict) i = i.split('.')[0] + #save the predicted image to ./test_result_temp with the same name of test images scipy.misc.imsave('./test_result_temp/{}.png'.format(i), result) with tf.Session() as sess: - save() + predict()