Skip to content

Commit

Permalink
with contextlib.suppress(ImportError) weakens imports (pantsbuild#1…
Browse files Browse the repository at this point in the history
…9293)

fixes pantsbuild#17691 

also:
- fixes the case of nested weakenings
- bumps tree-sitter-python to an unreleased version (last release was a
year ago)
- rebuilds the generation of tree-sitter node types to support multiple
symbols for a name
  • Loading branch information
lilatomic authored Aug 23, 2023
1 parent 3f4ed94 commit 3257d75
Show file tree
Hide file tree
Showing 7 changed files with 345 additions and 36 deletions.
6 changes: 3 additions & 3 deletions src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/rust/engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ tower-layer = "0.3"
tower-service = "0.3"
tree-sitter = "0.20.10"
tree-sitter-javascript = "0.20.0"
tree-sitter-python = "0.20.2"
tree-sitter-python = "0.20.4"
uname = "0.1.1"
url = "2.4"
uuid = "1.1.2"
Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/dep_inference/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
version = "0.0.1"
version = "0.0.2"
edition = "2021"
name = "dep_inference"
authors = ["Pants Build <[email protected]>"]
Expand Down
44 changes: 38 additions & 6 deletions src/rust/engine/dep_inference/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,18 @@
#![allow(clippy::mutex_atomic)]

use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::{collections::HashSet, io::Write, path::Path};
use std::{env, fs};
use walkdir::WalkDir;

/// The tree-sitter interfaces don't have nice constants that allow us to reference their magic numbers by name.
/// We generate those constants here.
/// Tree-sitter grammars don't have to give symbols unique names (in `ts_symbol_names`),
/// and there can be multiple symbols mapped to the same name.
/// For example, they might map both `block` and `_match_block` to "block" because one of those in internal
/// For most names, there will only be 1 symbol; for those, we create a const u16 for convenience.
/// For the names with multiple symbols, we generate a const array (hashmaps would have been nice, but I couldn't figure out how to make them const)
fn gen_constants_file(language: &tree_sitter::Language, out_dir: &Path) {
let mut file = std::fs::File::create(out_dir.join("constants.rs")).unwrap();

Expand All @@ -47,21 +55,45 @@ impl KindID {
)
.unwrap();

let mut kinds_seen = HashSet::new();
let mut kinds_to_ids: HashMap<String, HashSet<u16>> = HashMap::new();

// Collect the mapping of name->symbols
for id in (0_u16..(language.node_kind_count() as u16))
.chain([language.id_for_node_kind("ERROR", true)].iter().cloned())
{
if language.node_kind_is_named(id) {
let kind = language.node_kind_for_id(id).unwrap().to_uppercase();
if kinds_seen.insert(kind.clone()) {
file
.write_all(format!(" pub const {kind}: u16 = {id};\n").as_bytes())
.unwrap();
}
kinds_to_ids
.entry(kind)
.or_insert_with(HashSet::new)
.insert(id);
}
}

// Codegen for each name->symbol mapping
for (kind, ids) in kinds_to_ids {
let text = match ids.len() {
1 => {
let single = ids.iter().next().unwrap();
format!(" pub const {kind}: u16 = {single};\n")
}
_ => {
let items: String = ids
.iter()
.map(|id| id.to_string())
.collect::<Vec<String>>()
.join(", ");
format!(
" pub const {}: [u16; {}] = [{}];\n",
kind,
ids.len(),
items
)
}
};
file.write_all(text.as_bytes()).unwrap();
}

file.write_all(b"}\n").unwrap();
}

Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/dep_inference/src/javascript/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl Visitor for ImportCollector<'_> {
fn visit_expression_statement(&mut self, node: Node) -> ChildBehavior {
if node.children(&mut node.walk()).any(|child| {
let id = child.kind_id();
id == KindID::CALL_EXPRESSION || id == KindID::AWAIT_EXPRESSION
KindID::CALL_EXPRESSION.contains(&id) || id == KindID::AWAIT_EXPRESSION
}) {
return self.propagate_pragma(node);
}
Expand Down
100 changes: 88 additions & 12 deletions src/rust/engine/dep_inference/src/python/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,16 +234,6 @@ impl Visitor for ImportCollector<'_> {
ChildBehavior::Ignore
}

// @TODO: If we wanted to be most correct, this should use a stack. But realistically, that's
// kinda complicated:
// try:
// try:
// import weak1
// except Whatever:
// ...
// import weak2
// except ImportError:
// ...
fn visit_try_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior {
let mut should_weaken = false;
let mut cursor = node.walk();
Expand All @@ -269,15 +259,49 @@ impl Visitor for ImportCollector<'_> {
}

for child in children.iter() {
if child.kind_id() == KindID::BLOCK {
let previous_weaken = self.weaken_imports;
if KindID::BLOCK.contains(&child.kind_id()) {
self.weaken_imports = should_weaken;
}
self.walk(&mut child.walk());
self.weaken_imports = false;
self.weaken_imports = previous_weaken;
}
ChildBehavior::Ignore
}

fn visit_with_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior {
let with_clause = node.named_child(0).unwrap();

let are_suppressing_importerror = with_clause
.named_children(&mut with_clause.walk())
.any(|x| self.suppressing_importerror(x));

// remember to visit the withitems themselves
// for ex detecting imports in `with open("/foo/bar") as f`
for child in with_clause.named_children(&mut with_clause.walk()) {
self.walk(&mut child.walk());
}

let body_node = node.child_by_field_name("body").unwrap();
let body: Vec<_> = body_node.named_children(&mut body_node.walk()).collect();

if are_suppressing_importerror {
let previous_weaken = self.weaken_imports;
self.weaken_imports = true;

for child in body {
self.walk(&mut child.walk());
}
self.weaken_imports = previous_weaken;
} else {
for child in body {
self.walk(&mut child.walk());
}
}

ChildBehavior::Ignore
}

fn visit_call(&mut self, node: tree_sitter::Node) -> ChildBehavior {
let funcname = node.named_child(0).unwrap();
if self.code_at(funcname.range()) != "__import__" {
Expand Down Expand Up @@ -308,5 +332,57 @@ impl Visitor for ImportCollector<'_> {
}
}

impl ImportCollector<'_> {
fn suppressing_importerror(&mut self, with_node: tree_sitter::Node) -> bool {
if with_node.kind_id() == KindID::WITH_ITEM {
let node = with_node.child_by_field_name("value").unwrap(); // synthetic

let call_maybe_of_suppress = if node.kind_id() == KindID::CALL {
Some(node) // if we have a call directly `with suppress(ImportError):`
} else if KindID::AS_PATTERN.contains(&node.kind_id()) {
node.named_child(0).and_then(|n| match n.kind_id() {
KindID::CALL => Some(n),
_ => None,
}) // if we have a call with an `as` item `with suppress(ImportError) as e:`
} else {
None
};

if call_maybe_of_suppress.is_none() {
return false;
}

let function_name_expr = call_maybe_of_suppress
.unwrap()
.child_by_field_name("function")
.unwrap();
let is_supress = match function_name_expr.kind_id() {
KindID::ATTRIBUTE => function_name_expr
.child_by_field_name("attribute")
.map(|identifier| self.code_at(identifier.range()) == "suppress")
.unwrap_or(false),
KindID::IDENTIFIER => self.code_at(function_name_expr.range()) == "suppress",
_ => false,
};
if !is_supress {
return false;
}
let cur = &mut node.walk();

let has_importerror = call_maybe_of_suppress
.unwrap()
.child_by_field_name("arguments")
.map(|x| {
x.named_children(cur)
.any(|arg| self.code_at(arg.range()) == "ImportError")
})
.unwrap_or(false);
is_supress && has_importerror
} else {
false
}
}
}

#[cfg(test)]
mod tests;
Loading

0 comments on commit 3257d75

Please sign in to comment.