Skip to content

Commit

Permalink
Reload charge etc from file (duartegroup#305)
Browse files Browse the repository at this point in the history
* reload charge etc

* fix unused

* solvent_name property

* remove debug print

* Fix formatting [skip actions]
  • Loading branch information
t-young31 authored Oct 9, 2023
1 parent d29a528 commit d1b502e
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 55 deletions.
74 changes: 49 additions & 25 deletions autode/input_output.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from typing import Collection, Sequence, Optional, TYPE_CHECKING
from typing import Collection, Sequence, Optional, Any, TYPE_CHECKING

from autode.atoms import Atom, Atoms
from autode.exceptions import XYZfileDidNotExist
Expand All @@ -21,43 +21,47 @@ def xyz_file_to_atoms(filename: str) -> Atoms:
Arguments:
filename: .xyz filename
Raises:
(autode.exceptions.XYZfileWrongFormat): If the file is the wrong format
Returns:
(autode.atoms.Atoms): Atoms
"""
logger.info(f"Getting atoms from {filename}")

_check_xyz_file_exists(filename)
atoms = Atoms()
n_atoms = 0

with open(filename, "r") as xyz_file:

try:
# First item in an xyz file is the number of atoms
n_atoms = int(xyz_file.readline().split()[0])
for i, line in enumerate(open(filename, "r")):

except (IndexError, ValueError):
raise XYZfileWrongFormat("Number of atoms not found")
if i == 0: # First line in an xyz file is the number of atoms
n_atoms = _n_atoms_from_first_xyz_line(line)
continue
elif i == 1: # Second line of an xyz file is the tittle line
continue
elif i == n_atoms + 2:
break

# XYZ lines should be the following 2 + n_atoms lines
xyz_lines = xyz_file.readlines()[1 : n_atoms + 1]
try:
atom_label, x, y, z = line.split()[:4]
atoms.append(Atom(atomic_symbol=atom_label, x=x, y=y, z=z))

for i, line in enumerate(xyz_lines):
except (IndexError, TypeError, ValueError):
raise XYZfileWrongFormat(
f"Coordinate line {i} ({line}) not the correct format"
)

try:
atom_label, x, y, z = line.split()[:4]
atoms.append(Atom(atomic_symbol=atom_label, x=x, y=y, z=z))
if len(atoms) != n_atoms:
raise XYZfileWrongFormat(
f"Number of atoms declared ({n_atoms}) "
f"not equal to the number of atoms found "
f"{len(atoms)}"
)

except (IndexError, TypeError, ValueError):
raise XYZfileWrongFormat(
f"Coordinate line {i} ({line}) " f"not the correct format"
)
if n_atoms == 0:
raise XYZfileWrongFormat(f"XYZ file ({filename}) had no atoms!")

if len(atoms) != n_atoms:
raise XYZfileWrongFormat(
f"Number of atoms declared ({n_atoms}) "
f"not equal to the number of atoms found "
f"{len(atoms)}"
)
return atoms


Expand Down Expand Up @@ -127,13 +131,26 @@ def xyz_file_to_molecules(filename: str) -> Sequence["Molecule"]:

_set_attr_from_title_line(molecule, "charge", title_line)
_set_attr_from_title_line(molecule, "mult", title_line)
_set_attr_from_title_line(molecule, "energy", title_line, "E")
_set_attr_from_title_line(
molecule, "energy", title_line, key_in_line="E"
)

molecules.append(molecule)

return molecules


def attrs_from_xyz_title_line(filename: str) -> StringDict:
title_line = ""

for i, line in enumerate(open(filename, "r")):
if i == 1:
title_line = line.strip()
break

return StringDict(title_line)


def _check_xyz_file_exists(filename: str) -> None:

if not os.path.exists(filename):
Expand All @@ -160,3 +177,10 @@ def _set_attr_from_title_line(
logger.warning(f"Failed to set the species {attr} from xyz file")

return None


def _n_atoms_from_first_xyz_line(line: str) -> int:
try:
return int(line.strip())
except (IndexError, ValueError):
raise XYZfileWrongFormat("Number of atoms not found")
10 changes: 6 additions & 4 deletions autode/path/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def product_idx(self, product: "Species") -> Optional[int]:

return None

def products_made(self, product) -> bool:
def products_made(self, product: "Species") -> bool:
"""Are the products are made on the surface?
-----------------------------------------------------------------------
Expand All @@ -127,7 +127,7 @@ def products_made(self, product) -> bool:
"""
return self.product_idx(product) is not None

def is_saddle(self, idx) -> bool:
def is_saddle(self, idx: int) -> bool:
"""Is an index a saddle point"""
if idx == 0 or idx == len(self) - 1:
logger.warning("Cannot be saddle point, index was at the end")
Expand All @@ -143,7 +143,9 @@ def is_saddle(self, idx) -> bool:
energy = self[idx].energy
return self[idx - 1].energy < energy and self[idx + 1].energy < energy

def plot_energies(self, save, name, color, xlabel) -> None:
def plot_energies(
self, save: bool, name: str, color: str, xlabel: str
) -> None:
"""Plot this path"""
if len(self) == 0 or any(item.energy is None for item in self):
logger.error("Could not plot a surface, an energy was None")
Expand All @@ -164,7 +166,7 @@ def plot_energies(self, save, name, color, xlabel) -> None:

return None

def print_geometries(self, name) -> None:
def print_geometries(self, name: str) -> None:
"""Print an xyz trajectory of the geometries in the path"""

open(f"{name}.xyz", "w").close() # Empty the file
Expand Down
66 changes: 42 additions & 24 deletions autode/species/molecule.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import re
import rdkit
from pathlib import Path
from typing import Optional, List
from typing import Optional, List, Any
from rdkit.Chem import AllChem

from autode.log.methods import methods
from autode.input_output import xyz_file_to_atoms
from autode.input_output import xyz_file_to_atoms, attrs_from_xyz_title_line
from autode.conformers.conformer import Conformer
from autode.conformers.conf_gen import get_simanl_conformer
from autode.conformers.conformers import atoms_from_rdkit_mol
Expand All @@ -26,8 +26,8 @@ def __init__(
smiles: Optional[str] = None,
atoms: Optional[List["Atom"]] = None,
solvent_name: Optional[str] = None,
charge: int = 0,
mult: int = 1,
charge: Optional[int] = None,
mult: Optional[int] = None,
**kwargs,
):
"""
Expand All @@ -44,31 +44,42 @@ def __init__(
solvent_name: Solvent that the molecule is immersed in
charge: Charge on the molecule. Default = 0
charge: Charge on the molecule. If unspecified defaults to, if
present, that defined in a xyz file or 0
mult: Spin multiplicity on the molecule. Default = 1
mult: Spin multiplicity on the molecule. If unspecified defaults to, if
present, that defined in a xyz file or 1
Keyword Arguments:
name: Name of the molecule. Overrides arg if arg is not a .xyz
filename
"""
logger.info(f"Generating a Molecule object for {arg}")
name = arg if "name" not in kwargs else kwargs["name"]

super().__init__(name, atoms, charge, mult, solvent_name)

if arg.endswith(".xyz"):
self._init_xyz_file(xyz_filename=arg)

super().__init__(
name=arg if "name" not in kwargs else kwargs["name"],
atoms=atoms,
charge=charge if charge is not None else 0,
mult=mult if mult is not None else 1,
solvent_name=solvent_name,
)
self.smiles = smiles
self.rdkit_mol_obj = None
self.rdkit_conf_gen_is_fine = True

if _is_xyz_filename(arg):
self._init_xyz_file(
xyz_filename=arg,
charge=charge,
mult=mult,
solvent_name=solvent_name,
)

if smiles is not None:
self._init_smiles(smiles)
assert not _is_xyz_filename(arg), "Can't be both SMILES and file"
self._init_smiles(smiles, charge=charge)

# If the name is unassigned use a more interpretable chemical formula
if name == "molecule" and self.atoms is not None:
if self.name == "molecule" and self.atoms is not None:
self.name = self.formula

def __repr__(self):
Expand All @@ -78,7 +89,7 @@ def __eq__(self, other):
"""Equality of two molecules is only dependent on the identity"""
return super().__eq__(other)

def _init_smiles(self, smiles: str):
def _init_smiles(self, smiles: str, charge: Optional[int]):
"""Initialise a molecule from a SMILES string using RDKit if it's
purely organic.
Expand All @@ -87,18 +98,17 @@ def _init_smiles(self, smiles: str):
smiles (str):
"""
at_strings = re.findall(r"\[.*?]", smiles)
init_charge = self._charge

if any(metal in string for metal in metals for string in at_strings):
init_smiles(self, smiles)

else:
init_organic_smiles(self, smiles)

if not _is_default_charge(init_charge) and init_charge != self._charge:
if charge is not None and charge != self._charge:
raise ValueError(
"SMILES charge was not the same as the "
f"defined value. {self._charge}{init_charge}"
f"defined value. {self._charge}{charge}"
)

logger.info(
Expand All @@ -108,20 +118,28 @@ def _init_smiles(self, smiles: str):
)
return None

def _init_xyz_file(self, xyz_filename: str):
def _init_xyz_file(self, xyz_filename: str, **override_attrs: Any):
"""
Initialise a molecule from a .xyz file
-----------------------------------------------------------------------
Arguments:
xyz_filename (str):
charge (int | None):
mult (int | None):
Raises:
(ValueError)
"""
logger.info("Generating species from .xyz file")
logger.info("Generating a species from .xyz file")

self.atoms = xyz_file_to_atoms(xyz_filename)
title_line_attrs = attrs_from_xyz_title_line(xyz_filename)
logger.info(f"Found ({title_line_attrs}) in title line")

for attr in ("charge", "mult", "solvent_name"):
if override_attrs[attr] is None and attr in title_line_attrs:
setattr(self, attr, title_line_attrs[attr])

if (
sum(atom.atomic_number for atom in self.atoms) % 2 != 0
Expand All @@ -135,7 +153,7 @@ def _init_xyz_file(self, xyz_filename: str):
)

# Override the default name with something more descriptive
if self.name == "molecule" or self.name.endswith(".xyz"):
if self.name == "molecule" or _is_xyz_filename(self.name):
self.name = Path(self.name).stem

make_graph(self)
Expand Down Expand Up @@ -253,5 +271,5 @@ class Product(Molecule):
"""Product molecule"""


def _is_default_charge(value) -> bool:
return value == 0
def _is_xyz_filename(value: str) -> bool:
return isinstance(value, str) and value.endswith(".xyz")
32 changes: 31 additions & 1 deletion autode/species/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,13 @@ def print_xyz_file(

# Default generated title line
if title_line is None:
title_line = f"Generated by autodE on: {date.today()}. "
title_line = (
f"Generated by autodE on: {date.today()}. "
f"charge = {self.charge} "
f"mult = {self.mult} "
)
if self.solvent is not None and self.solvent.is_implicit:
title_line += f"solvent_name = {self.solvent.name} "
if self.energy is not None:
title_line += f"E = {self.energy:.6f} Ha"

Expand Down Expand Up @@ -1579,5 +1585,29 @@ def has_identical_composition_as(self, species: "Species") -> bool:
"""Does this species have the same chemical identity as another?"""
return self.sorted_atomic_symbols == species.sorted_atomic_symbols

@property
def solvent_name(self) -> Optional[str]:
"""
Name of the solvent or None
Returns:
(str | None):
"""
return None if self._solvent is None else self._solvent.name

@solvent_name.setter
def solvent_name(self, value: Optional[str]) -> None:
"""
Set the solvent of this species given an optional name. Setting this to None
removes the solvent
Arguments:
value (str | None): Name of the solvent to use
"""
if value is None:
self._solvent = None
else:
self._solvent = get_solvent(value, kind="implicit")

# --- Method aliases ---
symmetry_number = sn
4 changes: 4 additions & 0 deletions autode/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,10 @@ def __getitem__(self, item: str) -> Any:
f"using delimiter *{self._delim}*"
) from e

def __contains__(self, item: str) -> bool:
split_string = self._string.split(f"{item}{self._delim}")
return len(split_string) == 2

def get(self, item: str, default: Any) -> Any:
"""Get an item or return a default"""

Expand Down
4 changes: 3 additions & 1 deletion doc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ Functionality improvements
**************************
- Adds the thermochemistry method from A. Otlyotov, Y. Minenkov in https://doi.org/10.1002/jcc.27129
- Adds the improved Elastic Image Pair (i-EIP) method for double-ended transition state search

- Adds a :code:`autode.Species.solvent_name` property and setter for setting solvents from a string
- Enables reloading molecules from xyz files with their previously defined charge/multiplicity/solvent

Bug Fixes
*********
- Fixes Hessian extraction in some G16 output files
- Fixes large step sizes in DHSGS
- Fixes the ability to define both a SMILES string and a xyz file without raising an exception


1.4.0
Expand Down
Loading

0 comments on commit d1b502e

Please sign in to comment.