Skip to content

Commit

Permalink
New syntax - update this commit message
Browse files Browse the repository at this point in the history
  • Loading branch information
muffato committed Sep 10, 2020
1 parent 2c17270 commit 6d76e36
Show file tree
Hide file tree
Showing 11 changed files with 36 additions and 34 deletions.
2 changes: 1 addition & 1 deletion conf/agora-iterativerobust.ini
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ halfinsert (3)
refine (all)
extend +onlySingletons (all)
halfinsert (all)
groups _
groups

[/iterative-robust] publish

2 changes: 1 addition & 1 deletion conf/agora-multirobust.ini
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ denovo (3) ! =A0
refine (all) ! A0
extend +onlySingletons (all)
halfinsert (all)
groups _
groups

[/multi-robust] publish

2 changes: 1 addition & 1 deletion conf/agora-robust.ini
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ denovo (1)
refine (all)
extend +onlySingletons (all)
halfinsert (all)
groups _
groups

[/robust] publish

2 changes: 1 addition & 1 deletion conf/agora.ini
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ all = conservedPairs
# If none given, AGORA will use the same list as the previous step, starting with the entire species-tree

denovo +searchLoops (all)
groups _
groups

[/standard] publish

10 changes: 2 additions & 8 deletions doc/HowTo.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,14 +337,11 @@ and integration into an adjacency graph) but on the previous CARs, which allows
finding higher-level adjacencies.
The result is a set of CARs made of CARs, that are much longer than in the previous steps.

⚠ **Warning**: The underscore `_` is a required parameter. It tells AGORA
to consider all extant species under A0 for this step.

```bash
mkdir -p example/results/integrDiags/denovo-all.groups/
src/buildSynteny.integr-groups.py \
example/data/Species.nwk \
A0 _ \
A0 \
-IN.ancDiags=example/results/integrDiags/denovo-all/diags.%s.list.bz2 \
-OUT.ancDiags=example/results/integrDiags/denovo-all.groups/diags.%s.list.bz2 \
-LOG.ancGraph=example/results/integrDiags/denovo-all.groups/graph.%s.log.bz2 \
Expand Down Expand Up @@ -556,14 +553,11 @@ src/buildSynteny.integr-halfinsert.py \
Like in non-robust mode, this step does pairwise comparisons and a graph linearisation
of the CARs themselves, which allows finding higher-level adjacencies.

⚠ **Warning**: Here as well the underscore `_` must be given.
It tells AGORA to consider all extant species

```bash
mkdir -p example/results/integrDiags/denovo-size-1.0-1.0.refine-all.extend-all.halfinsert-all.groups
src/buildSynteny.integr-groups.py \
example/data/Species.nwk \
A0 _ \
A0 \
-IN.ancDiags=example/results/integrDiags/denovo-size-1.0-1.0.refine-all.extend-all.halfinsert-all/diags.%s.list.bz2 \
-OUT.ancDiags=example/results/integrDiags/denovo-size-1.0-1.0.refine-all.extend-all.halfinsert-all.groups/diags.%s.list.bz2 \
-LOG.ancGraph=example/results/integrDiags/denovo-size-1.0-1.0.refine-all.extend-all.halfinsert-all.groups/graph.%s.log.bz2 \
Expand Down
3 changes: 2 additions & 1 deletion src/agora.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def partition(s, delim):
phylTree = utils.myPhylTree.PhylogeneticTree(files["speciesTree"])


workflow = utils.myAgoraWorkflow.AgoraWorkflow(phylTree.root, scriptDir, files)
# TODO: add options in config file to change the target ancestors / species
workflow = utils.myAgoraWorkflow.AgoraWorkflow(phylTree.root, phylTree.root, scriptDir, files)

# Ancestral genes lists Section
################################
Expand Down
8 changes: 5 additions & 3 deletions src/agora1.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

arguments = utils.myTools.checkArgs(
[("speciesTree", file), ("geneTrees", file), ("genes", str)],
[("target", str, ""),
[("target", str, ""), ("extantSpeciesFilter", str, ""),
("workingDir", str, "."), ("nbThreads", int, multiprocessing.cpu_count())],
__doc__)

Expand All @@ -42,12 +42,14 @@
# Check that the syntax is correct
if arguments["target"]:
phylTree.getTargetsAnc(arguments["target"])
if arguments["extantSpeciesFilter"]:
phylTree.getTargetsSpec(arguments["extantSpeciesFilter"])

workflow = utils.myAgoraWorkflow.AgoraWorkflow(arguments["target"] or phylTree.root, scriptDir, files)
workflow = utils.myAgoraWorkflow.AgoraWorkflow(arguments["target"] or phylTree.root, arguments["extantSpeciesFilter"], scriptDir, files)
workflow.addAncGenesGenerationAnalysis()
workflow.addPairwiseAnalysis(workflow.allAncGenesDirName)
workflow.addIntegrationAnalysis("denovo", ['+searchLoops'], workflow.allAncGenesDirName)
workflow.addIntegrationAnalysis("groups", ['_'], None)
workflow.addIntegrationAnalysis("groups", [], None)
workflow.addIntegrationAnalysis("publish", [], None, taskName="//")

# Launching tasks in multiple threads
Expand Down
8 changes: 5 additions & 3 deletions src/agora2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

arguments = utils.myTools.checkArgs(
[("speciesTree", file), ("geneTrees", file), ("genes", str)],
[("minSize", float, 1.0), ("maxSize", float, 1.0), ("target", str, ""),
[("minSize", float, 1.0), ("maxSize", float, 1.0), ("target", str, ""), ("extantSpeciesFilter", str, ""),
("workingDir", str, "."), ("nbThreads", int, multiprocessing.cpu_count())],
__doc__)

Expand All @@ -45,8 +45,10 @@
# Check that the syntax is correct
if arguments["target"]:
phylTree.getTargetsAnc(arguments["target"])
if arguments["extantSpeciesFilter"]:
phylTree.getTargetsSpec(arguments["extantSpeciesFilter"])

workflow = utils.myAgoraWorkflow.AgoraWorkflow(arguments["target"] or phylTree.root, scriptDir, files)
workflow = utils.myAgoraWorkflow.AgoraWorkflow(arguments["target"] or phylTree.root, arguments["extantSpeciesFilter"], scriptDir, files)
workflow.addAncGenesGenerationAnalysis()
workflow.addAncGenesFilterAnalysis(ancGenesDirName, "size", [str(arguments['minSize']), str(arguments['maxSize'])], dirnameTemplate)
workflow.addPairwiseAnalysis(workflow.allAncGenesDirName)
Expand All @@ -55,7 +57,7 @@
workflow.addIntegrationAnalysis("refine", [], workflow.allAncGenesDirName)
workflow.addIntegrationAnalysis("extend", ["+onlySingletons"], workflow.allAncGenesDirName)
workflow.addIntegrationAnalysis("halfinsert", [], workflow.allAncGenesDirName)
workflow.addIntegrationAnalysis("groups", ['_'], None)
workflow.addIntegrationAnalysis("groups", [], None)
workflow.addIntegrationAnalysis("publish", [], None, taskName="//")

# Launching tasks in multiple threads
Expand Down
12 changes: 4 additions & 8 deletions src/buildSynteny.integr-groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@

# Arguments
arguments = utils.myTools.checkArgs( \
[("phylTree.conf", file), ("target", str), ("usedSpecies", str)], \
[("phylTree.conf", file), ("target", str)], \
[("minimalWeight", int, 1), ("anchorSize", int, 2), ("minChromLength", int, 2), \
("nbThreads", int, 0),
("extantSpeciesFilter", str, ""), \
("IN.ancDiags", str, ""), \
("LOG.ancGraph", str, "groups_log/%s.log.bz2"),
("OUT.ancDiags", str, "anc/diags.%s.list.bz2"), \
Expand Down Expand Up @@ -186,21 +187,16 @@ def toString(x, rev=False):
# Load species tree - target ancestral genome and the extant species used to assemble blocs
phylTree = utils.myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

targets = phylTree.getTargetsAnc(arguments["target"])
listSpecies = phylTree.getTargetsSpec(arguments["target"] if arguments["usedSpecies"] == "_" else arguments["usedSpecies"])
(listSpecies, targets, accessoryAncestors) = phylTree.getTargetsForPairwise(arguments["target"], arguments["extantSpeciesFilter"])

dicGenomes = {}
for e in listSpecies:
dicGenomes[e] = utils.myGenomes.Genome(arguments["genesFiles"] % phylTree.fileName[e], withDict=False)

genesAnc = {}
for anc in targets:
for anc in targets.union(accessoryAncestors):
genesAnc[anc] = utils.myGenomes.Genome(arguments["ancGenesFiles"] % phylTree.fileName[anc])

for anc in [phylTree.dicParents[e][a] for (e, a) in itertools.product(listSpecies, targets)]:
if anc not in genesAnc:
genesAnc[anc] = utils.myGenomes.Genome(arguments["ancGenesFiles"] % phylTree.fileName[anc])

toStudy = collections.defaultdict(list)
for (e1, e2) in itertools.combinations(listSpecies, 2):
for anc in targets.intersection(phylTree.dicLinks[e1][e2][1:-1]):
Expand Down
15 changes: 10 additions & 5 deletions src/buildSynteny.pairwise-conservedPairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# Arguments
arguments = utils.myTools.checkArgs(
[("phylTree.conf",file), ("target",str)], \
[("genesFiles",str,""), ("ancGenesFiles",str,""), ("OUT.pairwise",str,"")],
[("extantSpeciesFilter",str,""), ("genesFiles",str,""), ("ancGenesFiles",str,""), ("OUT.pairwise",str,"")],
__doc__
)

Expand All @@ -36,8 +36,7 @@
# Species to use
################

listSpecies = phylTree.getTargetsSpec(arguments["target"])
listAncestors = set(phylTree.dicParents[e1][e2] for (e1,e2) in itertools.combinations(listSpecies, 2))
(listSpecies, listAncestors, accessoryAncestors) = phylTree.getTargetsForPairwise(arguments["target"], arguments["extantSpeciesFilter"])

def revPair((g1, g2)):
return ((g2[0],-g2[1]),(g1[0],-g1[1]))
Expand All @@ -46,7 +45,7 @@ def revPair((g1, g2)):
dicModAnc = collections.defaultdict(list)

genesAnc = {}
for anc in listAncestors:
for anc in listAncestors.union(accessoryAncestors):
genesAnc[anc] = utils.myGenomes.Genome(arguments["ancGenesFiles"] % phylTree.fileName[anc])

print >> sys.stderr, "time for loading", time.time() - start
Expand All @@ -59,7 +58,7 @@ def revPair((g1, g2)):
anc = esp
while anc in phylTree.parent:
(par,_) = phylTree.parent[anc]
if par in listAncestors:
if par in genesAnc:
lanc.append((par, genesAnc[par].dicGenes, dicAncMod[par][anc]))
anc = par

Expand Down Expand Up @@ -167,6 +166,12 @@ def getTargets(listAnc, lmodPair):

# Results files.
for (anc, pairs) in details.iteritems():

# Accessory ancestor (required to compare against outgroups)
if anc not in listAncestors:
print >> sys.stderr, "Skipping", anc, "(not a target)"
continue

print >> sys.stderr, len(pairs), "conserved pairs for", anc

# -1 is the outgroup species, 1,2,3... are the descendantsdescendants
Expand Down
6 changes: 4 additions & 2 deletions src/utils/myAgoraWorkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,9 @@ class AgoraWorkflow:
inputParams = ["speciesTree", "geneTrees", "genes"]


def __init__(self, defaultRoot, scriptDir, files):
def __init__(self, defaultRoot, defaultExtantSpeciesFilter, scriptDir, files):
self.defaultRoot = defaultRoot
self.defaultExtantSpeciesFilter = ["-extantSpeciesFilter=" + defaultExtantSpeciesFilter] if defaultExtantSpeciesFilter else []
self.tasklist = TaskList()
self.scriptDir = scriptDir
self.files = files
Expand Down Expand Up @@ -243,7 +244,7 @@ def addPairwiseAnalysis(self, taskName, methodName="conservedPairs", params=[],
"-ancGenesFiles=" + self.files["ancGenesData"] % {"filt": taskName, "name": "%s"},
"-genesFiles=" + self.files["genes"] % {"name": "%s"},
"-OUT.pairwise=" + self.files["pairwiseOutput"] % {"filt": taskName, "name": "%s"}
] + params,
] + self.defaultExtantSpeciesFilter + params,
os.devnull,
self.files["pairwiseLog"] % {"filt": taskName},
launch,
Expand Down Expand Up @@ -318,6 +319,7 @@ def addIntegrationAnalysis(self, methodName, params, pairwiseName, taskName=None

if methodName == "groups":
args.append("-genesFiles=" + self.files["genes"] % {"name": "%s"})
args.extend(self.defaultExtantSpeciesFilter)

if methodName not in ["copy", "publish"]:
args.append("-LOG.ancGraph=" + self.files["integrOutput"] % {"method": newMethod, "name": "%s"})
Expand Down

0 comments on commit 6d76e36

Please sign in to comment.