-
Notifications
You must be signed in to change notification settings - Fork 68
/
Copy pathtest_gene_names.py
72 lines (60 loc) · 2.07 KB
/
test_gene_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
Test all methods which return collections of gene names that aren't converting
from some other type of name or ID.
"""
from __future__ import absolute_import, print_function
from pyensembl import genome_for_reference_name
from .common import run_multiple_genomes
grch38 = genome_for_reference_name("GRCh38")
KNOWN_GENE_NAMES = [
"TP53",
"ERBB2",
"SMAD4",
"CTAG1A",
"HLA-A",
]
@run_multiple_genomes()
def test_all_gene_names(genome):
"""
test_all_gene_names : Make sure some known gene names such as
SMAD4, TP53, ERBB2, &c
"""
gene_names = genome.gene_names()
print(type(gene_names))
for gene_name in KNOWN_GENE_NAMES:
assert gene_name in gene_names, "Missing gene name %s from %s" % (
gene_name,
genome,
)
def test_gene_names_at_locus_grch38_hla_a():
# chr6:29,945,884 is a position for HLA-A
# based on:
# http://useast.ensembl.org/Homo_sapiens/Gene/
# Summary?db=core;g=ENSG00000206503;r=6:29941260-29945884
names = grch38.gene_names_at_locus(6, 29945884)
assert names == ["HLA-A"], "Expected gene name HLA-A, got: %s" % (names,)
@run_multiple_genomes()
def test_gene_names_on_contig(genome):
gene_names_chr17 = genome.gene_names(17)
assert (
"TP53" in gene_names_chr17
), "No TP53 in gene names on chr17 of %s, gene names: %s ... (%d)" % (
genome,
list(gene_names_chr17[:4]),
len(gene_names_chr17),
)
gene_names_chr18 = genome.gene_names(18)
assert (
"SMAD4" in gene_names_chr18
), "No SMAD4 in gene names on chr18 of %s, gene names: %s ... (%d)" % (
genome,
list(gene_names_chr18[:4]),
len(gene_names_chr18),
)
def test_gene_name_of_HLA_gene_id():
gene_ids = grch38.gene_ids_of_gene_name("HLA-A")
gene_names = [grch38.gene_name_of_gene_id(gene_id) for gene_id in gene_ids]
unique_gene_names = list(set(gene_names))
assert len(unique_gene_names) == 1, (len(unique_gene_names), unique_gene_names)
gene_name = unique_gene_names[0]
assert gene_name == "HLA-A", gene_name