Skip to content

Commit

Permalink
fatal bugs fix
Browse files Browse the repository at this point in the history
  • Loading branch information
HiKapok committed Mar 14, 2018
1 parent fbc51f4 commit 7c0c5bd
Show file tree
Hide file tree
Showing 8 changed files with 623 additions and 63 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# X-Detector
X-Detector is a collection of several object detection algorithms. And some of those have not appeared in any academic papers.

Up to now, this repository contains code of the re-implement of [Light-Head R-CNN](https://arxiv.org/abs/1711.07264) and the debugging process is still going on. While several other detectors(named X-Det now) are also included, the main idea behind X-Det is to introduce explicit attention mechanisms between feature map channels, so I would like to change its name to "ABC (**A**ttention **B**etween **C**hannels)" later when the performance get to 0.7+mAP on PASCAL VOC 2007 Test Dataset (now only ~0.56mAP was achieved).
Up to now, this repository contains code of the re-implementent of [Light-Head R-CNN](https://arxiv.org/abs/1711.07264) and the training process is still going on. While several other detectors(named X-Det now) are also included, the main idea behind X-Det is to introduce explicit attention mechanisms between feature map channels, so I would like to change its name to "ABC (**A**ttention **B**etween **C**hannels)" later when the performance get to 0.7+mAP on PASCAL VOC 2007 Test Dataset (now only ~0.56mAP was achieved).

The pre-trained weight of backbone network can be found in [Resnet-50 backbone](https://github.com/tensorflow/models/tree/master/official/resnet) and [Xception backbone](https://github.com/HiKapok/Xception_Tensorflow). The latest version of PsRoIAlign is [here](https://github.com/HiKapok/PSROIAlign).

You can use part of these codes for your research purpose, but the ideas like the current implement of X-Det is not allowed to copy without permissions. While the codes for Light-Head R-CNN can be used for your research without any permission but following [Apache License 2.0](https://github.com/HiKapok/X-Detector/blob/master/LICENSE).
You can use part of these codes for your research purpose, but the ideas like the current implementent of X-Det is not allowed to copy without permissions. While the codes for Light-Head R-CNN can be used for your research without any permission but following [Apache License 2.0](https://github.com/HiKapok/X-Detector/blob/master/LICENSE).

Here are some demo result images of X-Det V2, debugging is still in process to make better performance:

Expand Down
499 changes: 499 additions & 0 deletions light_head_rfcn_eval.py

Large diffs are not rendered by default.

61 changes: 36 additions & 25 deletions light_head_rfcn_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from preprocessing import preprocessing_factory
from preprocessing import anchor_manipulator


#--run_on_cloud=False --data_format=channels_last --batch_size=1 --log_every_n_steps=1
# hardware related configuration
tf.app.flags.DEFINE_integer(
'num_readers', 16,
Expand Down Expand Up @@ -79,7 +79,7 @@
'train_epochs', None,
'The number of epochs to use for training.')
tf.app.flags.DEFINE_integer(
'batch_size', 4,
'batch_size', 8,
'Batch size for training and evaluation.')
tf.app.flags.DEFINE_boolean(
'using_ohem', True, 'Wether to use OHEM.')
Expand All @@ -90,7 +90,7 @@
'roi_one_image', 64,
'Batch size of RoIs for training in the second stage.')
tf.app.flags.DEFINE_string(
'data_format', 'channels_last', # 'channels_first' or 'channels_last'
'data_format', 'channels_first', # 'channels_first' or 'channels_last'
'A flag to override the data format used in the model. channels_first '
'provides a performance boost on GPU but is not always compatible '
'with CPU. If left unspecified, the data format will be chosen '
Expand Down Expand Up @@ -123,7 +123,7 @@
'rpn_neg_threshold', 0.3, 'Matching threshold for the negtive examples in the loss function for rpn.')
# optimizer related configuration
tf.app.flags.DEFINE_float(
'weight_decay', 0.0005, 'The weight decay on the model weights.')
'weight_decay', 0.0003, 'The weight decay on the model weights.')
tf.app.flags.DEFINE_float(
'momentum', 0.9,
'The momentum for the MomentumOptimizer and RMSPropOptimizer.')
Expand Down Expand Up @@ -161,10 +161,10 @@
'ignore_missing_vars', True,
'When restoring a checkpoint would ignore missing variables.')
tf.app.flags.DEFINE_boolean(
'run_on_cloud', False,
'run_on_cloud', True,
'Wether we will train on cloud (pre-trained model will be placed in the "data_dir/cloud_checkpoint_path").')
tf.app.flags.DEFINE_string(
'cloud_checkpoint_path', 'xception_model/xception_model',
'cloud_checkpoint_path', 'xception_model/xception_model.ckpt',
'The path to a checkpoint from which to fine-tune.')
#CUDA_VISIBLE_DEVICES
FLAGS = tf.app.flags.FLAGS
Expand Down Expand Up @@ -232,7 +232,7 @@ def input_fn():
#print(list_from_batch[-4], list_from_batch[-3])
return list_from_batch[-1], {'targets': list_from_batch[:-1],
'rpn_decode_fn': lambda pred : anchor_encoder_decoder.decode_all_anchors([pred], squeeze_inner=True)[0],
'head_decode_fn': lambda pred : anchor_encoder_decoder.ext_decode_rois,
'head_decode_fn': lambda rois, pred : anchor_encoder_decoder.ext_decode_rois(rois, pred, head_prior_scaling=[1., 1., 1., 1.]),
'rpn_encode_fn': lambda rois : anchor_encoder_decoder.ext_encode_rois(rois, list_from_batch[-4], list_from_batch[-3], FLAGS.roi_one_image, FLAGS.fg_ratio, 0.1, head_prior_scaling=[1., 1., 1., 1.]),
'num_anchors_list': num_anchors_list}
return input_fn
Expand All @@ -257,7 +257,7 @@ def modified_smooth_l1(bbox_pred, bbox_targets, bbox_inside_weights = 1., bbox_o

return outside_mul

def xdet_model_fn(features, labels, mode, params):
def lighr_head_model_fn(features, labels, mode, params):
"""Our model_fn for ResNet to be used with our Estimator."""
num_anchors_list = labels['num_anchors_list']
num_feature_layers = len(num_anchors_list)
Expand Down Expand Up @@ -336,48 +336,58 @@ def downsample_impl(now_count, need_count):
return tf.gather(cls_pred, final_keep_indices), tf.gather(location_pred, final_keep_indices), tf.cast(tf.gather(tf.clip_by_value(glabels, 0, params['num_classes']), final_keep_indices) > 0, tf.int64), tf.gather(gscores, final_keep_indices), tf.gather(gtargets, final_keep_indices)

cls_pred, location_pred, glabels, gscores, gtargets = select_samples(cls_pred, location_pred, glabels, gscores, gtargets)

# Calculate loss, which includes softmax cross entropy and L2 regularization.
rpn_cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred)

# Create a tensor named cross_entropy for logging purposes.
tf.identity(rpn_cross_entropy, name='rpn_cross_entropy_loss')
rpn_cross_entropy = tf.identity(rpn_cross_entropy, name='rpn_cross_entropy_loss')
tf.summary.scalar('rpn_cross_entropy_loss', rpn_cross_entropy)

total_positive_mask = (glabels > 0)
gtargets = tf.boolean_mask(gtargets, tf.stop_gradient(total_positive_mask))
location_pred = tf.boolean_mask(location_pred, tf.stop_gradient(total_positive_mask))
#gtargets = tf.Print(gtargets, [gtargets], message='gtargets:', summarize=100)

rpn_l1_distance = modified_smooth_l1(location_pred, gtargets, sigma=1.)
rpn_loc_loss = tf.reduce_mean(tf.reduce_sum(rpn_l1_distance, axis=-1))
rpn_loc_loss = tf.identity(rpn_loc_loss, name='rpn_location_loss')
tf.summary.scalar('rpn_location_loss', rpn_loc_loss)
tf.losses.add_loss(rpn_loc_loss)
#print(rpn_loc_loss)

proposals_bboxes, proposals_targets, proposals_labels, proposals_scores = xception_body.get_proposals(rpn_object_score, rpn_bboxes_pred, labels['rpn_encode_fn'], params['rpn_pre_nms_top_n'], params['rpn_post_nms_top_n'], params['nms_threshold'], params['rpn_min_size'], params['data_format'])
rpn_loss = tf.identity(rpn_loc_loss + rpn_cross_entropy, name='rpn_loss')
tf.summary.scalar('rpn_loss', rpn_loss)
#print(rpn_loc_loss)

proposals_bboxes, proposals_targets, proposals_labels, proposals_scores = xception_body.get_proposals(rpn_object_score, rpn_bboxes_pred, labels['rpn_encode_fn'], params['rpn_pre_nms_top_n'], params['rpn_post_nms_top_n'], params['nms_threshold'], params['rpn_min_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'])
#proposals_targets = tf.Print(proposals_targets, [proposals_targets], message='proposals_targets0:')
def head_loss_func(cls_score, bboxes_reg, select_indices, proposals_targets, proposals_labels):
if select_indices is not None:
proposals_targets = tf.gather(proposals_targets, select_indices, axis=1)
proposals_labels = tf.gather(proposals_labels, select_indices, axis=1)
# Calculate loss, which includes softmax cross entropy and L2 regularization.
head_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=proposals_labels, logits=cls_score)

total_positive_mask = tf.cast((proposals_labels > 0), tf.float32)
# proposals_targets = tf.boolean_mask(proposals_targets, tf.stop_gradient(total_positive_mask))
# bboxes_reg = tf.boolean_mask(bboxes_reg, tf.stop_gradient(total_positive_mask))
head_loc_loss = modified_smooth_l1(bboxes_reg, proposals_targets, sigma=1.)
head_loc_loss = tf.reduce_sum(head_loc_loss, axis=-1)
head_loc_loss = tf.reduce_sum(head_loc_loss, axis=-1) * total_positive_mask
if (params['using_ohem'] and (select_indices is not None)) or (not params['using_ohem']):
head_cross_entropy_loss = tf.reduce_mean(head_cross_entropy)
head_cross_entropy_loss = tf.identity(head_cross_entropy_loss, name='head_cross_entropy_loss')
tf.summary.scalar('head_cross_entropy_loss', head_cross_entropy_loss)

head_location_loss = tf.reduce_mean(head_loc_loss)
head_location_loss = tf.reduce_mean(head_loc_loss)/params['fg_ratio']
head_location_loss = tf.identity(head_location_loss, name='head_location_loss')
tf.summary.scalar('head_location_loss', head_location_loss)

# print(head_cross_entropy_loss)
# print(head_location_loss)

return head_cross_entropy + head_loc_loss
return head_cross_entropy + head_loc_loss/params['fg_ratio']

head_loss = xception_body.get_head(large_sep_feature, ps_roi_align, 7, 7, lambda cls, bbox, indices : head_loss_func(cls, bbox, indices, proposals_targets, proposals_labels), proposals_bboxes, proposals_targets, proposals_labels, proposals_scores, params['num_classes'], (mode == tf.estimator.ModeKeys.TRAIN), params['using_ohem'], params['ohem_roi_one_image'], params['data_format'], 'final_head')
head_loss = xception_body.get_head(large_sep_feature, lambda input_, bboxes_, grid_width_, grid_height_ : ps_roi_align(input_, bboxes_, grid_width_, grid_height_, pool_method), 7, 7, lambda cls, bbox, indices : head_loss_func(cls, bbox, indices, proposals_targets, proposals_labels), proposals_bboxes, params['num_classes'], (mode == tf.estimator.ModeKeys.TRAIN), params['using_ohem'], params['ohem_roi_one_image'], params['data_format'], 'final_head')

# Create a tensor named cross_entropy for logging purposes.
tf.identity(head_loss, name='head_loss')
head_loss = tf.identity(head_loss, name='head_loss')
tf.summary.scalar('head_loss', head_loss)

tf.losses.add_loss(head_loss)
Expand All @@ -387,7 +397,7 @@ def head_loss_func(cls_score, bboxes_reg, select_indices, proposals_targets, pro

# Add weight decay to the loss. We exclude the batch norm variables because
# doing so leads to a small improvement in accuracy.
loss = rpn_cross_entropy + rpn_loc_loss + head_loss + params['weight_decay'] * tf.add_n(
loss = 10 * rpn_cross_entropy + rpn_loc_loss + head_loss + params['weight_decay'] * tf.add_n(
[tf.nn.l2_loss(v) for v in tf.trainable_variables()
if 'batch_normalization' not in v.name])
total_loss = tf.identity(loss, name='total_loss')
Expand Down Expand Up @@ -442,7 +452,7 @@ def main(_):
session_config=config)

xdetector = tf.estimator.Estimator(
model_fn=xdet_model_fn, model_dir=FLAGS.model_dir, config=run_config,
model_fn=lighr_head_model_fn, model_dir=FLAGS.model_dir, config=run_config,
params={
'resnet_size': FLAGS.resnet_size,
'data_format': FLAGS.data_format,
Expand Down Expand Up @@ -480,6 +490,7 @@ def main(_):
'learning_rate': 'learning_rate',
'rpn_cross_entropy_loss': 'xception_lighthead/rpn_cross_entropy_loss',
'rpn_location_loss': 'xception_lighthead/rpn_location_loss',
'rpn_loss': 'xception_lighthead/rpn_loss',
'head_loss': 'xception_lighthead/head_loss',
'head_cross_entropy_loss': 'xception_lighthead/final_head/head_cross_entropy_loss',
'head_location_loss': 'xception_lighthead/final_head/head_location_loss',
Expand All @@ -488,11 +499,11 @@ def main(_):

logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=FLAGS.log_every_n_steps)

debug_hook = tf_debug.LocalCLIDebugHook(thread_name_filter="MainThread$")
debug_hook.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)

print('Starting a training cycle.')
#xdetector.train(input_fn=input_pipeline(), hooks=[debug_hook])

# debug_hook = tf_debug.LocalCLIDebugHook(thread_name_filter="MainThread$")
# debug_hook.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
# xdetector.train(input_fn=input_pipeline(), hooks=[debug_hook])
xdetector.train(input_fn=input_pipeline(), hooks=[logging_hook])

if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 7c0c5bd

Please sign in to comment.