Skip to content

Commit

Permalink
Merge pull request #3721 from RosettaCommons/klimaj/output_scorefile_…
Browse files Browse the repository at this point in the history
…json

Adding ability to PyJobDistributor class to output scorefile in JSON format

Old repository SHA1: a70c22642f00c68b38110ad2dc02c782f637aaaa
  • Loading branch information
klimaj authored Jan 2, 2019
1 parent 86fe18f commit 7fe964e
Showing 1 changed file with 73 additions and 38 deletions.
111 changes: 73 additions & 38 deletions source/src/python/PyRosetta/src/pyrosetta/toolbox/py_jobdistributor.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,68 @@
# :noTabs=true:
#
# (c) Copyright Rosetta Commons Member Institutions.
# (c) This file is part of the Rosetta software suite and is made available under license.
# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
# (c) For more information, see http://www.rosettacommons.org.
# (c) Questions about this can be addressed to University of Washington CoMotion, email: [email protected].

import json
import os
import random
import pyrosetta
import random

def output_scorefile(pose, pdb_name, current_name, scorefilepath, \
scorefxn, nstruct, native_pose=None, additional_decoy_info=None):

def output_scorefile(pose, pdb_name, current_name, scorefilepath, scorefxn, nstruct,
native_pose=None, additional_decoy_info=None, json_format=False):
"""
Moved from PyJobDistributor (Jared Adolf-Bryfogle)
Creates a scorefile if none exists, or appends the current one.
Calculates and writes CA_rmsd if native pose is given,
as well as any additional decoy info
"""
if not os.path.exists(scorefilepath):
with open(scorefilepath, 'w') as f:
f.write("pdb name: " + pdb_name + " nstruct: " +
str(nstruct) + '\n')
total_score = scorefxn(pose) # Scores pose and calculates total score.

if json_format:
entries = {}
entries.update(
{
"pdb_name": str(pdb_name),
"filename": str(current_name),
"nstruct": int(nstruct)
}
)
entries.update(dict(pose.scores))
if native_pose:
entries["rmsd"] = float(pyrosetta.rosetta.core.scoring.CA_rmsd(native_pose, pose))
if additional_decoy_info:
entries["additional_decoy_info"] = str(additional_decoy_info)

score = scorefxn(pose) # Calculates total score.
score_line = pose.energies().total_energies().weighted_string_of(scorefxn.weights())
output_line = "filename: " + current_name + " total_score: " + str(round(score, 2))
with open(scorefilepath, "a") as f:
json.dump(entries, f)
f.write("\n")

# Calculates rmsd if native pose is defined.
if native_pose:
rmsd = pyrosetta.rosetta.core.scoring.CA_rmsd(native_pose, pose)
output_line = output_line + " rmsd: " + str(round(rmsd, 2))
else:
if not os.path.exists(scorefilepath):
with open(scorefilepath, "w") as f:
f.write("pdb name: " + pdb_name + " nstruct: " + str(nstruct) + "\n")

with open(scorefilepath, 'a') as f:
if additional_decoy_info:
f.write(output_line + ' ' + score_line + ' '+additional_decoy_info + '\n')
else:
f.write(output_line + ' ' + score_line + '\n')
score_line = pose.energies().total_energies().weighted_string_of(scorefxn.weights())
output_line = "filename: " + current_name + " total_score: " + str(round(total_score, 3))

# Calculates rmsd if native pose is defined.
if native_pose:
rmsd = pyrosetta.rosetta.core.scoring.CA_rmsd(native_pose, pose)
output_line = output_line + " rmsd: " + str(round(rmsd, 3))

with open(scorefilepath, "a") as f:
if additional_decoy_info:
f.write(output_line + " " + score_line + " " + additional_decoy_info + "\n")
else:
f.write(output_line + " " + score_line + "\n")


class PyJobDistributor:

def __init__(self, pdb_name, nstruct, scorefxn, compress=False):
self.pdb_name = pdb_name
self.nstruct = nstruct
Expand All @@ -41,32 +72,35 @@ def __init__(self, pdb_name, nstruct, scorefxn, compress=False):
self.current_name = None # Current decoy name
self.scorefxn = scorefxn # Used for final score calculation
self.native_pose = None # Used for rmsd calculation
self.additional_decoy_info = None # Used for any additional decoy information you want stored

self.sequence = list(range(nstruct)); random.shuffle(self.sequence)
self.start_decoy() # Initializes the job distributor
self.additional_decoy_info = None # Used for any additional decoy information you want stored
self.json_format = False # Used for JSON formatted scorefile

self.sequence = list(range(nstruct))
random.shuffle(self.sequence)
self.start_decoy() # Initializes the job distributor

@property
def job_complete(self): return len(self.sequence) == 0
def job_complete(self):
return len(self.sequence) == 0

def start_decoy(self):
while(self.sequence):
self.current_id = self.sequence[0]
while self.sequence:

self.current_name = self.pdb_name + '_' + str(self.current_id) + ('.pdb.gz' if self.compress else '.pdb')
self.current_in_progress_name = self.current_name + '.in_progress'
self.current_id = self.sequence[0]
self.current_name = self.pdb_name + "_" + str(self.current_id) + (".pdb.gz" if self.compress else ".pdb")
self.current_in_progress_name = self.current_name + ".in_progress"

if (not os.path.isfile(self.current_name)) and (not os.path.isfile(self.current_in_progress_name)):
with open(self.current_in_progress_name, 'w') as f: f.write("This decoy is in progress.")
print( 'Working on decoy: {}'.format(self.current_name) )
if (not os.path.isfile(self.current_name)) and (not os.path.isfile(self.current_in_progress_name)):
with open(self.current_in_progress_name, "w") as f:
f.write("This decoy is in progress.")
print("Working on decoy: {}".format(self.current_name))
break


def output_decoy(self, pose):
if os.path.isfile(self.current_name): # decoy is already exist, probably written to other process -> moving to next decoy if any
if os.path.isfile(self.current_name): # decoy already exists, probably written to other process -> moving to next decoy if any

if os.path.isfile(self.current_in_progress_name): os.remove(self.current_in_progress_name)
if os.path.isfile(self.current_in_progress_name):
os.remove(self.current_in_progress_name)

if not self.job_complete:
self.start_decoy()
Expand All @@ -78,20 +112,21 @@ def output_decoy(self, pose):
pose.dump_pdb(s)

z = pyrosetta.rosetta.utility.io.ozstream(self.current_name)
z.write(s.str(), len(s.str() ) )
z.write(s.str(), len(s.str()))
del z

else:
pose.dump_pdb(self.current_name)

score_tag = '.fasc' if pose.is_fullatom() else '.sc'
score_tag = ".fasc" if pose.is_fullatom() else ".sc"

scorefile = self.pdb_name + score_tag
output_scorefile(pose, self.pdb_name, self.current_name, scorefile, self.scorefxn,
self.nstruct, self.native_pose, self.additional_decoy_info)
self.nstruct, self.native_pose, self.additional_decoy_info, self.json_format)

self.sequence.remove(self.current_id)

if os.path.isfile(self.current_in_progress_name): os.remove(self.current_in_progress_name)
if os.path.isfile(self.current_in_progress_name):
os.remove(self.current_in_progress_name)

self.start_decoy()

0 comments on commit 7fe964e

Please sign in to comment.