Skip to content

Commit

Permalink
Addition of documentation and clippy fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaume-be committed Oct 4, 2020
1 parent 6dde324 commit 1b3711e
Show file tree
Hide file tree
Showing 17 changed files with 588 additions and 102 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ all-tests = []
features = ["doc-only"]

[dependencies]
rust_tokenizers = {version = "~5.0.1", path = "E:/Coding/backup-rust/rust-tokenizers/main"}
rust_tokenizers = "~5.0.1"
tch = "~0.2.0"
serde_json = "1.0.56"
serde = { version = "1.0.114", features = ["derive"] }
Expand Down
10 changes: 5 additions & 5 deletions benches/translation_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ fn bench_squad(c: &mut Criterion) {
"They found that certain wavelengths of light, which are usually absorbed by water, weakened when the planet was in the way, indicating not only does K2-18b have an atmosphere, but the atmosphere contains water in vapour form.",
"The team from UCL then analyzed the Montreal team's data using their own software and confirmed their conclusion.",
"This was not the first time scientists have found signs of water on an exoplanet, but previous discoveries were made on planets with high temperatures or other pronounced differences from Earth.",
// "This is the first potentially habitable planet where the temperature is right and where we now know there is water,\" said UCL astronomer Angelos Tsiaras.",
// "It's the best candidate for habitability right now.\" \"It's a good sign\", said Ryan Cloutier of the Harvard–Smithsonian Center for Astrophysics, who was not one of either study's authors.",
// "Overall,\" he continued, \"the presence of water in its atmosphere certainly improves the prospect of K2-18b being a potentially habitable planet, but further observations will be required to say for sure. \"",
// "K2-18b was first identified in 2015 by the Kepler space telescope.",
// "It is about 110 light-years from Earth and larger but less dense.",
"This is the first potentially habitable planet where the temperature is right and where we now know there is water,\" said UCL astronomer Angelos Tsiaras.",
"It's the best candidate for habitability right now.\" \"It's a good sign\", said Ryan Cloutier of the Harvard–Smithsonian Center for Astrophysics, who was not one of either study's authors.",
"Overall,\" he continued, \"the presence of water in its atmosphere certainly improves the prospect of K2-18b being a potentially habitable planet, but further observations will be required to say for sure. \"",
"K2-18b was first identified in 2015 by the Kepler space telescope.",
"It is about 110 light-years from Earth and larger but less dense.",
];
// (New sample credits: [WikiNews](https://en.wikinews.org/wiki/Astronomers_find_water_vapour_in_atmosphere_of_exoplanet_K2-18b))
c.bench_function("Translation forward pass", |b| {
Expand Down
2 changes: 1 addition & 1 deletion src/albert/albert_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ impl AlbertModel {
},
};

let mask = mask.unwrap_or(Tensor::ones(&input_shape, (Kind::Int64, device)));
let mask = mask.unwrap_or_else(|| Tensor::ones(&input_shape, (Kind::Int64, device)));

let extended_attention_mask = mask.unsqueeze(1).unsqueeze(2);
let extended_attention_mask: Tensor =
Expand Down
4 changes: 2 additions & 2 deletions src/bart/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::bart::embeddings::{
EmbeddingOption, LearnedPositionalEmbedding, SinusoidalPositionalEmbedding,
};
use crate::bart::BartConfig;
use crate::common::activations::Activation;
use crate::common::activations::{Activation, TensorFunction};
use crate::common::dropout::Dropout;
use std::borrow::{Borrow, BorrowMut};
use tch::kind::Kind::Bool;
Expand All @@ -27,7 +27,7 @@ pub struct EncoderLayer {
self_attention_layer_norm: nn::LayerNorm,
dropout: Dropout,
activation_dropout: Dropout,
activation: Box<dyn Fn(&Tensor) -> Tensor>,
activation: TensorFunction,
fc1: nn::Linear,
fc2: nn::Linear,
final_layer_norm: nn::LayerNorm,
Expand Down
3 changes: 2 additions & 1 deletion src/bert/attention.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// limitations under the License.

use crate::bert::bert_model::BertConfig;
use crate::common::activations::TensorFunction;
use crate::common::dropout::Dropout;
use std::borrow::Borrow;
use tch::kind::Kind::Float;
Expand Down Expand Up @@ -222,7 +223,7 @@ impl BertAttention {

pub struct BertIntermediate {
lin: nn::Linear,
activation: Box<dyn Fn(&Tensor) -> Tensor>,
activation: TensorFunction,
}

impl BertIntermediate {
Expand Down
2 changes: 1 addition & 1 deletion src/bert/bert_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ impl<T: BertEmbedding> BertModel<T> {
},
};

let mask = mask.unwrap_or(Tensor::ones(&input_shape, (Kind::Int64, device)));
let mask = mask.unwrap_or_else(|| Tensor::ones(&input_shape, (Kind::Int64, device)));

let extended_attention_mask = match mask.dim() {
3 => mask.unsqueeze(1),
Expand Down
13 changes: 6 additions & 7 deletions src/bert/embeddings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,16 +197,15 @@ impl BertEmbedding for BertEmbeddings {

let seq_length = input_embeddings.as_ref().size()[1].to_owned();

let position_ids = position_ids.unwrap_or(
let position_ids = position_ids.unwrap_or_else(|| {
Tensor::arange(seq_length, (Kind::Int64, input_embeddings.device()))
.unsqueeze(0)
.expand(&input_shape, true),
);
.expand(&input_shape, true)
});

let token_type_ids = token_type_ids.unwrap_or(Tensor::zeros(
&input_shape,
(Kind::Int64, input_embeddings.device()),
));
let token_type_ids = token_type_ids.unwrap_or_else(|| {
Tensor::zeros(&input_shape, (Kind::Int64, input_embeddings.device()))
});

let position_embeddings = position_ids.apply(&self.position_embeddings);
let token_type_embeddings = token_type_ids.apply(&self.token_type_embeddings);
Expand Down
4 changes: 3 additions & 1 deletion src/common/activations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ pub fn _tanh(x: &Tensor) -> Tensor {
x.tanh()
}

pub type TensorFunction = Box<fn(&Tensor) -> Tensor>;

#[allow(non_camel_case_types)]
#[derive(Clone, Debug, Serialize, Deserialize, Copy)]
/// # Activation function used in the attention layer and masked language model head
Expand All @@ -45,7 +47,7 @@ pub enum Activation {
}

impl Activation {
pub fn get_function(&self) -> Box<fn(&Tensor) -> Tensor> {
pub fn get_function(&self) -> TensorFunction {
Box::new(match self {
Activation::gelu => _gelu,
Activation::relu => _relu,
Expand Down
6 changes: 4 additions & 2 deletions src/common/summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::common::activations::{Activation, _gelu, _gelu_new, _mish, _relu, _swish, _tanh};
use crate::common::activations::{
Activation, TensorFunction, _gelu, _gelu_new, _mish, _relu, _swish, _tanh,
};
use crate::common::dropout::Dropout;
use crate::xlnet::XLNetConfig;
use crate::RustBertError;
Expand Down Expand Up @@ -66,7 +68,7 @@ impl From<&XLNetConfig> for SummaryConfig {
pub struct SequenceSummary {
summary: Option<nn::Linear>,
summary_type: SummaryType,
activation: Option<Box<fn(&Tensor) -> Tensor>>,
activation: Option<TensorFunction>,
first_dropout: Option<Dropout>,
last_dropout: Option<Dropout>,
}
Expand Down
3 changes: 2 additions & 1 deletion src/distilbert/transformer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::common::activations::TensorFunction;
use crate::common::dropout::Dropout;
use crate::distilbert::attention::MultiHeadSelfAttention;
use crate::distilbert::distilbert_model::DistilBertConfig;
Expand All @@ -21,7 +22,7 @@ pub struct FeedForwardNetwork {
lin1: nn::Linear,
lin2: nn::Linear,
dropout: Dropout,
activation: Box<dyn Fn(&Tensor) -> Tensor>,
activation: TensorFunction,
}

impl FeedForwardNetwork {
Expand Down
4 changes: 2 additions & 2 deletions src/electra/electra_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl ElectraModel {
None
};
let bert_config = BertConfig {
hidden_act: config.hidden_act.clone(),
hidden_act: config.hidden_act,
attention_probs_dropout_prob: config.attention_probs_dropout_prob,
hidden_dropout_prob: config.hidden_dropout_prob,
hidden_size: config.hidden_size,
Expand Down Expand Up @@ -254,7 +254,7 @@ impl ElectraModel {
},
};

let mask = mask.unwrap_or(Tensor::ones(&input_shape, (Kind::Int64, device)));
let mask = mask.unwrap_or_else(|| Tensor::ones(&input_shape, (Kind::Int64, device)));

let extended_attention_mask = match mask.dim() {
3 => mask.unsqueeze(1),
Expand Down
6 changes: 4 additions & 2 deletions src/gpt2/transformer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::common::activations::{Activation, _gelu_new, _mish, _relu, _swish, _tanh};
use crate::common::activations::{
Activation, TensorFunction, _gelu_new, _mish, _relu, _swish, _tanh,
};
use crate::common::dropout::Dropout;
use crate::gpt2::attention::{Attention, GPTConv1D};
use crate::gpt2::gpt2_model::Gpt2Config;
Expand All @@ -22,7 +24,7 @@ use tch::{nn, Tensor};
pub struct MLP {
c_fc: GPTConv1D,
c_proj: GPTConv1D,
activation: Box<dyn Fn(&Tensor) -> Tensor>,
activation: TensorFunction,
dropout: Dropout,
}

Expand Down
7 changes: 3 additions & 4 deletions src/roberta/embeddings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,9 @@ impl BertEmbedding for RobertaEmbeddings {
},
};

let token_type_ids = token_type_ids.unwrap_or(Tensor::zeros(
&input_shape,
(Kind::Int64, input_embeddings.device()),
));
let token_type_ids = token_type_ids.unwrap_or_else(|| {
Tensor::zeros(&input_shape, (Kind::Int64, input_embeddings.device()))
});

let position_embeddings = position_ids.apply(&self.position_embeddings);
let token_type_embeddings = token_type_ids.apply(&self.token_type_embeddings);
Expand Down
54 changes: 27 additions & 27 deletions src/xlnet/attention.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ pub struct XLNetRelativeAttention {
hidden_size: i64,
dropout: Dropout,
output_attentions: bool,
q: Tensor,
k: Tensor,
v: Tensor,
o: Tensor,
r: Tensor,
query: Tensor,
key: Tensor,
value: Tensor,
output: Tensor,
pos: Tensor,
r_r_bias: Tensor,
r_s_bias: Tensor,
r_w_bias: Tensor,
Expand All @@ -72,31 +72,31 @@ impl XLNetRelativeAttention {
);
let p = p.borrow();

let q = p.var(
let query = p.var(
"q",
&[config.d_model, config.n_head, config.d_head],
Init::KaimingUniform,
);

let k = p.var(
let key = p.var(
"k",
&[config.d_model, config.n_head, config.d_head],
Init::KaimingUniform,
);

let v = p.var(
let value = p.var(
"v",
&[config.d_model, config.n_head, config.d_head],
Init::KaimingUniform,
);

let o = p.var(
let output = p.var(
"o",
&[config.d_model, config.n_head, config.d_head],
Init::KaimingUniform,
);

let r = p.var(
let pos = p.var(
"r",
&[config.d_model, config.n_head, config.d_head],
Init::KaimingUniform,
Expand Down Expand Up @@ -140,11 +140,11 @@ impl XLNetRelativeAttention {
hidden_size: config.d_model,
dropout,
output_attentions,
q,
k,
v,
o,
r,
query,
key,
value,
output,
pos,
r_r_bias,
r_s_bias,
r_w_bias,
Expand Down Expand Up @@ -216,7 +216,7 @@ impl XLNetRelativeAttention {
residual: bool,
train: bool,
) -> Tensor {
let mut attention_out = Tensor::einsum("ibnd,hnd->ibh", &[attention_vector, &self.o])
let mut attention_out = Tensor::einsum("ibnd,hnd->ibh", &[attention_vector, &self.output])
.apply_t(&self.dropout, train);
if residual {
attention_out = attention_out + h;
Expand All @@ -236,7 +236,7 @@ impl XLNetRelativeAttention {
target_mapping: Option<&Tensor>,
train: bool,
) -> (Tensor, Option<Tensor>, Option<Tensor>, Option<Tensor>) {
if g.is_some() {
if let Some(g) = g {
let cat_value = if let Some(mems) = &layer_state {
if mems.prev_content.size().len() > 1 {
Some(Tensor::cat(&[&mems.prev_content, h], 0))
Expand All @@ -251,10 +251,10 @@ impl XLNetRelativeAttention {
None => h,
};

let q_head_h = Tensor::einsum("ibh,hnd->ibnd", &[h, &self.q]);
let k_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.k]);
let v_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.v]);
let k_head_r = Tensor::einsum("ibh,hnd->ibnd", &[r, &self.r]);
let q_head_h = Tensor::einsum("ibh,hnd->ibnd", &[h, &self.query]);
let k_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.key]);
let v_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.value]);
let k_head_r = Tensor::einsum("ibh,hnd->ibnd", &[r, &self.pos]);

let (attention_vec_h, attention_probas_h) = self.rel_attention_core(
&q_head_h,
Expand All @@ -267,7 +267,7 @@ impl XLNetRelativeAttention {
);

let output_h = self.post_attention(h, &attention_vec_h, true, train);
let q_head_g = Tensor::einsum("ibh,hnd->ibnd", &[g.unwrap(), &self.q]);
let q_head_g = Tensor::einsum("ibh,hnd->ibnd", &[g, &self.query]);

let (attention_vec_g, attention_probas_g) = match target_mapping {
Some(target_mapping) => {
Expand Down Expand Up @@ -296,7 +296,7 @@ impl XLNetRelativeAttention {
),
};

let output_g = self.post_attention(g.unwrap(), &attention_vec_g, true, train);
let output_g = self.post_attention(g, &attention_vec_g, true, train);
(
output_h,
Some(output_g),
Expand All @@ -318,10 +318,10 @@ impl XLNetRelativeAttention {
None => h,
};

let q_head_h = Tensor::einsum("ibh,hnd->ibnd", &[h, &self.q]);
let k_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.k]);
let v_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.v]);
let k_head_r = Tensor::einsum("ibh,hnd->ibnd", &[r, &self.r]);
let q_head_h = Tensor::einsum("ibh,hnd->ibnd", &[h, &self.query]);
let k_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.key]);
let v_head_h = Tensor::einsum("ibh,hnd->ibnd", &[cat, &self.value]);
let k_head_r = Tensor::einsum("ibh,hnd->ibnd", &[r, &self.pos]);

let (attention_vec, attention_probas) = self.rel_attention_core(
&q_head_h,
Expand Down
3 changes: 2 additions & 1 deletion src/xlnet/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::common::activations::TensorFunction;
use crate::common::dropout::Dropout;
use crate::xlnet::attention::{LayerState, XLNetRelativeAttention};
use crate::xlnet::XLNetConfig;
Expand All @@ -23,7 +24,7 @@ pub struct XLNetFeedForward {
layer_2: nn::Linear,
layer_norm: nn::LayerNorm,
dropout: Dropout,
activation: Box<dyn Fn(&Tensor) -> Tensor>,
activation: TensorFunction,
}

impl XLNetFeedForward {
Expand Down
Loading

0 comments on commit 1b3711e

Please sign in to comment.