Skip to content

Commit

Permalink
Use cc_binary rather than cc_library to reduce size of native library…
Browse files Browse the repository at this point in the history
… in APK from 5.5mb to 3.2mb (compressed).

Change: 113369407
  • Loading branch information
A. Unique TensorFlower authored and Vijay Vasudevan committed Jan 30, 2016
1 parent faf747a commit 8a59748
Show file tree
Hide file tree
Showing 60 changed files with 2,576 additions and 438 deletions.
61 changes: 51 additions & 10 deletions configure
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/bin/bash

if [ "$TF_UNOFFICIAL_SETTING" == "1" ]; then
echo -e "\nWARNING: You are configuring unofficial settings in TensorFlow. Because some external libraries are not backward compatible, these settings are largely untested and unsupported. \n" 1>&2
fi

## Set up python-related environment settings
while true; do
fromuser=""
Expand Down Expand Up @@ -44,32 +48,55 @@ fi

# Find out where the CUDA toolkit is installed
while true; do
# Configure the Cuda SDK version to use.
default_cuda_version="7.0"
if [ "$TF_UNOFFICIAL_SETTING" == "1" ]; then
if [ -z "$TF_CUDA_VERSION" ]; then
read -p "Please specify the Cuda SDK version you want to use. [Default is $default_cuda_version]: " TF_CUDA_VERSION
fi
fi
if [ -z "$TF_CUDA_VERSION" ]; then
TF_CUDA_VERSION=$default_cuda_version
fi

fromuser=""
if [ -z "$CUDA_TOOLKIT_PATH" ]; then
default_cuda_path=/usr/local/cuda
read -p "Please specify the location where CUDA 7.0 toolkit is installed. Refer to README.md for more details. [Default is $default_cuda_path]: " CUDA_TOOLKIT_PATH
read -p "Please specify the location where CUDA $TF_CUDA_VERSION toolkit is installed. Refer to README.md for more details. [Default is $default_cuda_path]: " CUDA_TOOLKIT_PATH
fromuser="1"
if [ -z "$CUDA_TOOLKIT_PATH" ]; then
CUDA_TOOLKIT_PATH=$default_cuda_path
fi
fi
if [ -e "$CUDA_TOOLKIT_PATH/lib64/libcudart.so.7.0" ]; then
if [ -e "$CUDA_TOOLKIT_PATH/lib64/libcudart.so.$TF_CUDA_VERSION" ]; then
break
fi
echo "Invalid path to CUDA 7.0 toolkit. ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so.7.0 cannot be found"
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so.$TF_CUDA_VERSION cannot be found"
if [ -z "$fromuser" ]; then
exit 1
fi
TF_CUDA_VERSION=""
CUDA_TOOLKIT_PATH=""
# Retry
done

# Find out where the cuDNN library is installed
while true; do
# Configure the Cudnn version to use.
default_cudnn_version="6.5"
if [ "$TF_UNOFFICIAL_SETTING" == "1" ]; then
if [ -z "$TF_CUDNN_VERSION" ]; then
read -p "Please specify the Cudnn version you want to use. [Default is $default_cudnn_version]: " TF_CUDNN_VERSION
fi
fi
if [ -z "$TF_CUDNN_VERSION" ]; then
TF_CUDNN_VERSION=$default_cudnn_version
fi

fromuser=""
if [ -z "$CUDNN_INSTALL_PATH" ]; then
default_cudnn_path=${CUDA_TOOLKIT_PATH}
read -p "Please specify the location where cuDNN v2 library is installed. Refer to README.md for more details. [Default is $default_cudnn_path]: " CUDNN_INSTALL_PATH
read -p "Please specify the location where cuDNN $TF_CUDNN_VERSION library is installed. Refer to README.md for more details. [Default is $default_cudnn_path]: " CUDNN_INSTALL_PATH
fromuser="1"
if [ -z "$CUDNN_INSTALL_PATH" ]; then
CUDNN_INSTALL_PATH=$default_cudnn_path
Expand All @@ -78,32 +105,46 @@ while true; do
# Going through one more level of expansion to handle that.
CUDNN_INSTALL_PATH=$(bash -c "readlink -f $CUDNN_INSTALL_PATH")
fi
if [ -e "$CUDNN_INSTALL_PATH/libcudnn.so.6.5" -o -e "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.6.5" ]; then
if [ -e "$CUDNN_INSTALL_PATH/libcudnn.so.${TF_CUDNN_VERSION}" -o -e "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.${TF_CUDNN_VERSION}" ]; then
break
fi
echo "Invalid path to cuDNN v2 toolkit. Neither of the following two files can be found:"
echo "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.6.5"
echo "$CUDNN_INSTALL_PATH/libcudnn.so.6.5"
echo "Invalid path to cuDNN ${TF_CUDNN_VERSION} toolkit. Neither of the following two files can be found:"
echo "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.${TF_CUDNN_VERSION}"
echo "$CUDNN_INSTALL_PATH/libcudnn.so.${TF_CUDNN_VERSION}"
if [ -z "$fromuser" ]; then
exit 1
fi
TF_CUDNN_VERSION=""
CUDNN_INSTALL_PATH=""
# Retry
done

cat > third_party/gpus/cuda/cuda.config <<EOF
# CUDA_TOOLKIT_PATH refers to the CUDA toolkit. Tensorflow requires Cuda 7.0
# CUDA_TOOLKIT_PATH refers to the CUDA toolkit. Tensorflow requires Cuda $TF_CUDA_VERSION
# at the moment.
CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
# CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
# files can be either in this directory, or under include/ and lib64/
# directories separately.
CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
# The Cuda SDK version that should be used in this build
TF_CUDA_VERSION=$TF_CUDA_VERSION
# The Cudnn version that should be used in this build
TF_CUDNN_VERSION=$TF_CUDNN_VERSION
EOF

function UnofficialSetting() {
echo -e "\nWARNING: You are configuring unofficial settings in TensorFlow. Because some external libraries are not backward compatible, these settings are largely untested and unsupported. \n" 1>&2
# Configure the Cuda toolkit version to work with.
perl -pi -e "s,CUDA_VERSION = '[0-9\.]*',CUDA_VERSION = '$TF_CUDA_VERSION',s" tensorflow/core/platform/default/build_config.bzl
perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_VERSION\",s" tensorflow/stream_executor/dso_loader.cc

# Configure the Cudnn version to work with.
perl -pi -e "s,CUDNN_VERSION = '[0-9\.]*',CUDNN_VERSION = '$TF_CUDNN_VERSION',s" tensorflow/core/platform/default/build_config.bzl
perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_VERSION\",s" tensorflow/stream_executor/dso_loader.cc

# Configure the compute capabilities that TensorFlow builds for.
# Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.
Expand Down
1 change: 1 addition & 0 deletions tensorflow/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ tf_cuda_library(
"graph/graph_constructor.h",
"graph/graph_def_builder.h",
"graph/node_builder.h",
"graph/validate.h",
"public/session.h",
"public/session_options.h",
"public/tensor_c_api.h",
Expand Down
92 changes: 80 additions & 12 deletions tensorflow/core/client/tensor_c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -316,16 +316,16 @@ Status LoadLibrary(const char* library_filename, void** result,

} // namespace tensorflow

extern "C" {

void TF_Run(TF_Session* s,
// Input tensors
const char** c_input_names, TF_Tensor** c_inputs, int ninputs,
// Output tensors
const char** c_output_tensor_names, TF_Tensor** c_outputs,
int noutputs,
// Target nodes
const char** c_target_node_names, int ntargets, TF_Status* status) {
void TF_Run_Helper(TF_Session* s, const char* handle,
// Input tensors
const char** c_input_names, TF_Tensor** c_inputs,
int ninputs,
// Output tensors
const char** c_output_tensor_names, TF_Tensor** c_outputs,
int noutputs,
// Target nodes
const char** c_target_node_names, int ntargets,
TF_Status* status) {
status->status = Status::OK();
for (int i = 0; i < noutputs; i++) {
c_outputs[i] = NULL;
Expand Down Expand Up @@ -365,8 +365,13 @@ void TF_Run(TF_Session* s,
for (int i = 0; i < ntargets; i++) {
target_node_names[i] = c_target_node_names[i];
}
Status result =
s->session->Run(inputs, output_tensor_names, target_node_names, &outputs);
Status result;
if (handle == nullptr) {
result = s->session->Run(inputs, output_tensor_names, target_node_names,
&outputs);
} else {
result = s->session->PRun(handle, inputs, output_tensor_names, &outputs);
}
if (!result.ok()) {
status->status = result;
return;
Expand All @@ -392,6 +397,69 @@ void TF_Run(TF_Session* s,
}
}

extern "C" {

void TF_Run(TF_Session* s,
// Input tensors
const char** c_input_names, TF_Tensor** c_inputs, int ninputs,
// Output tensors
const char** c_output_tensor_names, TF_Tensor** c_outputs,
int noutputs,
// Target nodes
const char** c_target_node_names, int ntargets, TF_Status* status) {
TF_Run_Helper(s, nullptr, c_input_names, c_inputs, ninputs,
c_output_tensor_names, c_outputs, noutputs, c_target_node_names,
ntargets, status);
}

void TF_PRunSetup(TF_Session* s,
// Input names
const char** c_input_names, int ninputs,
// Output names
const char** c_output_tensor_names, int noutputs,
// Target nodes
const char** c_target_node_names, int ntargets, char** handle,
TF_Status* status) {
status->status = Status::OK();

std::vector<tensorflow::string> input_names(ninputs);
std::vector<tensorflow::string> output_tensor_names(noutputs);
std::vector<tensorflow::string> target_node_names(ntargets);
for (int i = 0; i < ninputs; i++) {
input_names[i] = c_input_names[i];
}
for (int i = 0; i < noutputs; i++) {
output_tensor_names[i] = c_output_tensor_names[i];
}
for (int i = 0; i < ntargets; i++) {
target_node_names[i] = c_target_node_names[i];
}
tensorflow::string new_handle;
Status result;
result = s->session->PRunSetup(input_names, output_tensor_names,
target_node_names, &new_handle);
if (result.ok()) {
*handle = new char[new_handle.size() + 1];
memcpy(*handle, new_handle.c_str(), new_handle.size() + 1);
} else {
status->status = result;
}
}

void TF_PRun(TF_Session* s, const char* handle,
// Input tensors
const char** c_input_names, TF_Tensor** c_inputs, int ninputs,
// Output tensors
const char** c_output_tensor_names, TF_Tensor** c_outputs,
int noutputs,
// Target nodes
const char** c_target_node_names, int ntargets,
TF_Status* status) {
TF_Run_Helper(s, handle, c_input_names, c_inputs, ninputs,
c_output_tensor_names, c_outputs, noutputs, c_target_node_names,
ntargets, status);
}

const void* TF_BufferData(TF_Buffer* buffer) { return buffer->data; }

size_t TF_BufferLength(TF_Buffer* buffer) { return buffer->length; }
Expand Down
97 changes: 48 additions & 49 deletions tensorflow/core/common_runtime/copy_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,58 +53,57 @@ void CopyTensor::ViaDMA(const string& edge_name,
StatusCallback done) {
initialization_done = true;
port::Tracing::ScopedAnnotation annotation(edge_name);
VLOG(1) << "CopyViaDMA " << edge_name;
const size_t total_bytes = input->TotalBytes();

// Note that 0-size tensors have no backing buffer.
if (total_bytes > 0) {
const DeviceType src_device_type(src_alloc_attr.on_host()
? DEVICE_CPU
: src->attributes().device_type());
const DeviceType dst_device_type(dst_alloc_attr.on_host()
? DEVICE_CPU
: dst->attributes().device_type());
const bool non_cpu_src = src_device_type != DeviceType(DEVICE_CPU);
const bool non_cpu_dst = dst_device_type != DeviceType(DEVICE_CPU);

if (non_cpu_src) {
if (non_cpu_dst) {
// Device to device copy. Look through registry for an appropriate
// CopyFunction.
std::vector<RegistrationInfo>* registry = MutableRegistry();
for (const RegistrationInfo& ri : *registry) {
if (ri.sender_device_type == src_device_type &&
ri.receiver_device_type == dst_device_type) {
ri.copy_function(send_dev_context, recv_dev_context, src, dst,
src_alloc_attr, dst_alloc_attr, input, output,
done);
return;
}
}

// TODO(josh11b): If no CopyFunction is found, we currently fail
// but we could copy between devices via CPU.
done(errors::Unimplemented(
"No function registered to copy from devices of type ",
src_device_type.type(), " to devices of type ",
dst_device_type.type()));
} else {
// Device to host copy.
return send_dev_context->CopyDeviceTensorToCPU(input, edge_name, src,
output, done);
VLOG(1) << "Copy " << edge_name;

const DeviceType src_device_type(
src_alloc_attr.on_host() ? DEVICE_CPU : src->attributes().device_type());
const DeviceType dst_device_type(
dst_alloc_attr.on_host() ? DEVICE_CPU : dst->attributes().device_type());
const bool non_cpu_src = src_device_type != DeviceType(DEVICE_CPU);
const bool non_cpu_dst = dst_device_type != DeviceType(DEVICE_CPU);

// E.g., gpu -> gpu
if (non_cpu_src && non_cpu_dst) {
// Device to device copy. Look through registry for an appropriate
// CopyFunction.
std::vector<RegistrationInfo>* registry = MutableRegistry();
for (const RegistrationInfo& ri : *registry) {
if (ri.sender_device_type == src_device_type &&
ri.receiver_device_type == dst_device_type) {
ri.copy_function(send_dev_context, recv_dev_context, src, dst,
src_alloc_attr, dst_alloc_attr, input, output, done);
return;
}
} else if (non_cpu_dst) {
// Host to Device copy.
// Note that this is already an async copy.
recv_dev_context->CopyCPUTensorToDevice(input, dst, output, done);
} else {
*output = *input;
done(Status::OK());
}
} else {
// buffer is empty
done(Status::OK());

// TODO(josh11b): If no CopyFunction is found, we currently fail
// but we could copy between devices via CPU.
done(errors::Unimplemented(
"No function registered to copy from devices of type ",
src_device_type.type(), " to devices of type ",
dst_device_type.type()));
return;
}

// E.g., gpu -> cpu
if (non_cpu_src && !non_cpu_dst) {
// Device to host copy.
send_dev_context->CopyDeviceTensorToCPU(input, edge_name, src, output,
done);
return;
}

// E.g., cpu -> gpu
if (!non_cpu_src && non_cpu_dst) {
// Host to Device copy.
recv_dev_context->CopyCPUTensorToDevice(input, dst, output, done);
return;
}

// cpu -> cpu
CHECK(!non_cpu_src && !non_cpu_dst);
*output = *input;
done(Status::OK());
}

// static
Expand Down
Loading

0 comments on commit 8a59748

Please sign in to comment.