Reload charge etc from file (duartegroup#305)

* reload charge etc * fix unused * solvent_name property * remove debug print * Fix formatting [skip actions]
aleksy-k · Oct 9, 2023 · d1b502e · d1b502e
1 parent d29a528
commit d1b502e
Show file tree

Hide file tree

Showing 7 changed files with 162 additions and 55 deletions.
diff --git a/autode/input_output.py b/autode/input_output.py
@@ -1,6 +1,6 @@
 import os
 
-from typing import Collection, Sequence, Optional, TYPE_CHECKING
+from typing import Collection, Sequence, Optional, Any, TYPE_CHECKING
 
 from autode.atoms import Atom, Atoms
 from autode.exceptions import XYZfileDidNotExist
@@ -21,43 +21,47 @@ def xyz_file_to_atoms(filename: str) -> Atoms:
     Arguments:
         filename: .xyz filename
 
+    Raises:
+        (autode.exceptions.XYZfileWrongFormat): If the file is the wrong format
+
     Returns:
         (autode.atoms.Atoms): Atoms
     """
     logger.info(f"Getting atoms from {filename}")
 
     _check_xyz_file_exists(filename)
     atoms = Atoms()
+    n_atoms = 0
 
-    with open(filename, "r") as xyz_file:
-
-        try:
-            # First item in an xyz file is the number of atoms
-            n_atoms = int(xyz_file.readline().split()[0])
+    for i, line in enumerate(open(filename, "r")):
 
-        except (IndexError, ValueError):
-            raise XYZfileWrongFormat("Number of atoms not found")
+        if i == 0:  # First line in an xyz file is the number of atoms
+            n_atoms = _n_atoms_from_first_xyz_line(line)
+            continue
+        elif i == 1:  # Second line of an xyz file is the tittle line
+            continue
+        elif i == n_atoms + 2:
+            break
 
-        # XYZ lines should be the following 2 + n_atoms lines
-        xyz_lines = xyz_file.readlines()[1 : n_atoms + 1]
+        try:
+            atom_label, x, y, z = line.split()[:4]
+            atoms.append(Atom(atomic_symbol=atom_label, x=x, y=y, z=z))
 
-        for i, line in enumerate(xyz_lines):
+        except (IndexError, TypeError, ValueError):
+            raise XYZfileWrongFormat(
+                f"Coordinate line {i} ({line}) not the correct format"
+            )
 
-            try:
-                atom_label, x, y, z = line.split()[:4]
-                atoms.append(Atom(atomic_symbol=atom_label, x=x, y=y, z=z))
+    if len(atoms) != n_atoms:
+        raise XYZfileWrongFormat(
+            f"Number of atoms declared ({n_atoms}) "
+            f"not equal to the number of atoms found "
+            f"{len(atoms)}"
+        )
 
-            except (IndexError, TypeError, ValueError):
-                raise XYZfileWrongFormat(
-                    f"Coordinate line {i} ({line}) " f"not the correct format"
-                )
+    if n_atoms == 0:
+        raise XYZfileWrongFormat(f"XYZ file ({filename}) had no atoms!")
 
-        if len(atoms) != n_atoms:
-            raise XYZfileWrongFormat(
-                f"Number of atoms declared ({n_atoms}) "
-                f"not equal to the number of atoms found "
-                f"{len(atoms)}"
-            )
     return atoms
 
 
@@ -127,13 +131,26 @@ def xyz_file_to_molecules(filename: str) -> Sequence["Molecule"]:
 
         _set_attr_from_title_line(molecule, "charge", title_line)
         _set_attr_from_title_line(molecule, "mult", title_line)
-        _set_attr_from_title_line(molecule, "energy", title_line, "E")
+        _set_attr_from_title_line(
+            molecule, "energy", title_line, key_in_line="E"
+        )
 
         molecules.append(molecule)
 
     return molecules
 
 
+def attrs_from_xyz_title_line(filename: str) -> StringDict:
+    title_line = ""
+
+    for i, line in enumerate(open(filename, "r")):
+        if i == 1:
+            title_line = line.strip()
+            break
+
+    return StringDict(title_line)
+
+
 def _check_xyz_file_exists(filename: str) -> None:
 
     if not os.path.exists(filename):
@@ -160,3 +177,10 @@ def _set_attr_from_title_line(
         logger.warning(f"Failed to set the species {attr} from xyz file")
 
     return None
+
+
+def _n_atoms_from_first_xyz_line(line: str) -> int:
+    try:
+        return int(line.strip())
+    except (IndexError, ValueError):
+        raise XYZfileWrongFormat("Number of atoms not found")
diff --git a/autode/path/path.py b/autode/path/path.py
@@ -115,7 +115,7 @@ def product_idx(self, product: "Species") -> Optional[int]:
 
         return None
 
-    def products_made(self, product) -> bool:
+    def products_made(self, product: "Species") -> bool:
         """Are the products are made on the surface?
 
         -----------------------------------------------------------------------
@@ -127,7 +127,7 @@ def products_made(self, product) -> bool:
         """
         return self.product_idx(product) is not None
 
-    def is_saddle(self, idx) -> bool:
+    def is_saddle(self, idx: int) -> bool:
         """Is an index a saddle point"""
         if idx == 0 or idx == len(self) - 1:
             logger.warning("Cannot be saddle point, index was at the end")
@@ -143,7 +143,9 @@ def is_saddle(self, idx) -> bool:
         energy = self[idx].energy
         return self[idx - 1].energy < energy and self[idx + 1].energy < energy
 
-    def plot_energies(self, save, name, color, xlabel) -> None:
+    def plot_energies(
+        self, save: bool, name: str, color: str, xlabel: str
+    ) -> None:
         """Plot this path"""
         if len(self) == 0 or any(item.energy is None for item in self):
             logger.error("Could not plot a surface, an energy was None")
@@ -164,7 +166,7 @@ def plot_energies(self, save, name, color, xlabel) -> None:
 
         return None
 
-    def print_geometries(self, name) -> None:
+    def print_geometries(self, name: str) -> None:
         """Print an xyz trajectory of the geometries in the path"""
 
         open(f"{name}.xyz", "w").close()  # Empty the file

diff --git a/autode/species/molecule.py b/autode/species/molecule.py
@@ -1,11 +1,11 @@
 import re
 import rdkit
 from pathlib import Path
-from typing import Optional, List
+from typing import Optional, List, Any
 from rdkit.Chem import AllChem
 
 from autode.log.methods import methods
-from autode.input_output import xyz_file_to_atoms
+from autode.input_output import xyz_file_to_atoms, attrs_from_xyz_title_line
 from autode.conformers.conformer import Conformer
 from autode.conformers.conf_gen import get_simanl_conformer
 from autode.conformers.conformers import atoms_from_rdkit_mol
@@ -26,8 +26,8 @@ def __init__(
         smiles: Optional[str] = None,
         atoms: Optional[List["Atom"]] = None,
         solvent_name: Optional[str] = None,
-        charge: int = 0,
-        mult: int = 1,
+        charge: Optional[int] = None,
+        mult: Optional[int] = None,
         **kwargs,
     ):
         """
@@ -44,31 +44,42 @@ def __init__(
 
             solvent_name: Solvent that the molecule is immersed in
 
-            charge: Charge on the molecule. Default = 0
+            charge: Charge on the molecule. If unspecified defaults to, if
+                    present, that defined in a xyz file or 0
 
-            mult: Spin multiplicity on the molecule. Default = 1
+            mult: Spin multiplicity on the molecule. If unspecified defaults to, if
+                  present, that defined in a xyz file or 1
 
         Keyword Arguments:
             name: Name of the molecule. Overrides arg if arg is not a .xyz
                   filename
         """
         logger.info(f"Generating a Molecule object for {arg}")
-        name = arg if "name" not in kwargs else kwargs["name"]
-
-        super().__init__(name, atoms, charge, mult, solvent_name)
-
-        if arg.endswith(".xyz"):
-            self._init_xyz_file(xyz_filename=arg)
-
+        super().__init__(
+            name=arg if "name" not in kwargs else kwargs["name"],
+            atoms=atoms,
+            charge=charge if charge is not None else 0,
+            mult=mult if mult is not None else 1,
+            solvent_name=solvent_name,
+        )
         self.smiles = smiles
         self.rdkit_mol_obj = None
         self.rdkit_conf_gen_is_fine = True
 
+        if _is_xyz_filename(arg):
+            self._init_xyz_file(
+                xyz_filename=arg,
+                charge=charge,
+                mult=mult,
+                solvent_name=solvent_name,
+            )
+
         if smiles is not None:
-            self._init_smiles(smiles)
+            assert not _is_xyz_filename(arg), "Can't be both SMILES and file"
+            self._init_smiles(smiles, charge=charge)
 
         # If the name is unassigned use a more interpretable chemical formula
-        if name == "molecule" and self.atoms is not None:
+        if self.name == "molecule" and self.atoms is not None:
             self.name = self.formula
 
     def __repr__(self):
@@ -78,7 +89,7 @@ def __eq__(self, other):
         """Equality of two molecules is only dependent on the identity"""
         return super().__eq__(other)
 
-    def _init_smiles(self, smiles: str):
+    def _init_smiles(self, smiles: str, charge: Optional[int]):
         """Initialise a molecule from a SMILES string using RDKit if it's
         purely organic.
 
@@ -87,18 +98,17 @@ def _init_smiles(self, smiles: str):
             smiles (str):
         """
         at_strings = re.findall(r"\[.*?]", smiles)
-        init_charge = self._charge
 
         if any(metal in string for metal in metals for string in at_strings):
             init_smiles(self, smiles)
 
         else:
             init_organic_smiles(self, smiles)
 
-        if not _is_default_charge(init_charge) and init_charge != self._charge:
+        if charge is not None and charge != self._charge:
             raise ValueError(
                 "SMILES charge was not the same as the "
-                f"defined value. {self._charge} ≠ {init_charge}"
+                f"defined value. {self._charge} ≠ {charge}"
             )
 
         logger.info(
@@ -108,20 +118,28 @@ def _init_smiles(self, smiles: str):
         )
         return None
 
-    def _init_xyz_file(self, xyz_filename: str):
+    def _init_xyz_file(self, xyz_filename: str, **override_attrs: Any):
         """
         Initialise a molecule from a .xyz file
 
         -----------------------------------------------------------------------
         Arguments:
             xyz_filename (str):
+            charge (int | None):
+            mult (int | None):
 
         Raises:
             (ValueError)
         """
-        logger.info("Generating species from .xyz file")
+        logger.info("Generating a species from .xyz file")
 
         self.atoms = xyz_file_to_atoms(xyz_filename)
+        title_line_attrs = attrs_from_xyz_title_line(xyz_filename)
+        logger.info(f"Found ({title_line_attrs}) in title line")
+
+        for attr in ("charge", "mult", "solvent_name"):
+            if override_attrs[attr] is None and attr in title_line_attrs:
+                setattr(self, attr, title_line_attrs[attr])
 
         if (
             sum(atom.atomic_number for atom in self.atoms) % 2 != 0
@@ -135,7 +153,7 @@ def _init_xyz_file(self, xyz_filename: str):
             )
 
         # Override the default name with something more descriptive
-        if self.name == "molecule" or self.name.endswith(".xyz"):
+        if self.name == "molecule" or _is_xyz_filename(self.name):
             self.name = Path(self.name).stem
 
         make_graph(self)
@@ -253,5 +271,5 @@ class Product(Molecule):
     """Product molecule"""
 
 
-def _is_default_charge(value) -> bool:
-    return value == 0
+def _is_xyz_filename(value: str) -> bool:
+    return isinstance(value, str) and value.endswith(".xyz")
diff --git a/autode/species/species.py b/autode/species/species.py
@@ -1157,7 +1157,13 @@ def print_xyz_file(
 
         # Default generated title line
         if title_line is None:
-            title_line = f"Generated by autodE on: {date.today()}. "
+            title_line = (
+                f"Generated by autodE on: {date.today()}. "
+                f"charge = {self.charge} "
+                f"mult = {self.mult} "
+            )
+            if self.solvent is not None and self.solvent.is_implicit:
+                title_line += f"solvent_name = {self.solvent.name} "
             if self.energy is not None:
                 title_line += f"E = {self.energy:.6f} Ha"
 
@@ -1579,5 +1585,29 @@ def has_identical_composition_as(self, species: "Species") -> bool:
         """Does this species have the same chemical identity as another?"""
         return self.sorted_atomic_symbols == species.sorted_atomic_symbols
 
+    @property
+    def solvent_name(self) -> Optional[str]:
+        """
+        Name of the solvent or None
+
+        Returns:
+            (str | None):
+        """
+        return None if self._solvent is None else self._solvent.name
+
+    @solvent_name.setter
+    def solvent_name(self, value: Optional[str]) -> None:
+        """
+        Set the solvent of this species given an optional name. Setting this to None
+        removes the solvent
+
+        Arguments:
+            value (str | None): Name of the solvent to use
+        """
+        if value is None:
+            self._solvent = None
+        else:
+            self._solvent = get_solvent(value, kind="implicit")
+
     # --- Method aliases ---
     symmetry_number = sn
diff --git a/autode/utils.py b/autode/utils.py
@@ -719,6 +719,10 @@ def __getitem__(self, item: str) -> Any:
                 f"using delimiter *{self._delim}*"
             ) from e
 
+    def __contains__(self, item: str) -> bool:
+        split_string = self._string.split(f"{item}{self._delim}")
+        return len(split_string) == 2
+
     def get(self, item: str, default: Any) -> Any:
         """Get an item or return a default"""
 

diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -10,12 +10,14 @@ Functionality improvements
 **************************
 - Adds the thermochemistry method from A. Otlyotov, Y. Minenkov in https://doi.org/10.1002/jcc.27129
 - Adds the improved Elastic Image Pair (i-EIP) method for double-ended transition state search
-
+- Adds a :code:`autode.Species.solvent_name` property and setter for setting solvents from a string
+- Enables reloading molecules from xyz files with their previously defined charge/multiplicity/solvent
 
 Bug Fixes
 *********
 - Fixes Hessian extraction in some G16 output files
 - Fixes large step sizes in DHSGS
+- Fixes the ability to define both a SMILES string and a xyz file without raising an exception
 
 
 1.4.0