From 677b76642e408b545ebdc642665e3c8dff3a16ff Mon Sep 17 00:00:00 2001 From: Jacklaw441 Date: Wed, 10 Jun 2026 22:58:53 -0400 Subject: [PATCH] updated flexEFP tutorial with newer scripts --- doc/applied_efp.rst | 552 +++-- examples/flex-EFP/Scripts/cut_caps.py | 790 +++++++ examples/flex-EFP/Scripts/frag_RMSD_V2.py | 294 +++ examples/flex-EFP/Scripts/make_AAs.py | 1309 +++++++++-- examples/flex-EFP/Scripts/make_qchem_input.py | 201 ++ .../flex-EFP/Scripts/step4.Flexible_V7.py | 2057 +++++++++++++++++ 6 files changed, 4733 insertions(+), 470 deletions(-) create mode 100644 examples/flex-EFP/Scripts/cut_caps.py create mode 100644 examples/flex-EFP/Scripts/frag_RMSD_V2.py create mode 100644 examples/flex-EFP/Scripts/make_qchem_input.py create mode 100644 examples/flex-EFP/Scripts/step4.Flexible_V7.py diff --git a/doc/applied_efp.rst b/doc/applied_efp.rst index 70fc9d4..a6c93cd 100644 --- a/doc/applied_efp.rst +++ b/doc/applied_efp.rst @@ -10,33 +10,41 @@ Introduction QM/EFP methods are powerful tools for describing photo- and redox-chemistry in condensed phases, see e.g., :ref:`bio_papers`. However, setting up QM/EFP calculations for complex systems is not trivial and time-consuming. This tutorial presents a computational workflow that helps -interested users to setup QM/EFP calculations of photoactive proteins in the format suitable for -calculations in Q-Chem. It is assumed that the initial structures for QM/EFP calculations are obtained -from a Gromacs MD trajectory. GAMESS is used for computing EFP parameters. +interested users to set up QM/EFP calculations of photoactive proteins in a format suitable for +calculations in Q-Chem. It is assumed that the initial structures for QM/EFP calculations are +obtained from a GROMACS MD trajectory. GAMESS is used for computing EFP parameters. -The workflow was tested on several photosynthetic pigment-protein -complexes (FMO, PS 1, WSCP). -While the workflow is reasonably general and can be adapted to other biological and materials systems, -the QM/EFP calculations are nowhere "black-box" and the user will need to make some decisions based on the system -specifics and the properties of interest. +The workflow was tested on several photosynthetic pigment-protein complexes (FMO, PS1, WSCP). +While the workflow is reasonably general and can be adapted to other biological and materials +systems, the QM/EFP calculations are not black-box procedures and the user will need to make +decisions based on the system specifics and the properties of interest. + +The following scripts are covered in this tutorial: + +* ``make_AAs.py`` — prepares GAMESS MAKEFP input files for all EFP region fragments +* ``cut_caps.py`` — trims EFP parameter files by removing capping hydrogens and correcting charges +* ``make_qchem_input.py`` — assembles the final Q-Chem QM/EFP input file Preliminaries ============= -* Gromacs installation -* GAMESS installation -* Q-Chem installation -* Python 3 +The following software is required: + +* GROMACS +* GAMESS +* Q-Chem +* libEFP +* Python 3 (with numpy) -To start with, you will need the following Gromacs files: +To start with, you will need the following GROMACS files: -* structure file (.g96) -* topology file (.top) -* binary file from molecular dynamics (.tpr) +* structure file (``.g96``) +* topology file (``.top``) +* binary run input file (``.tpr``) If you wish to use the FlexEFP scheme as described in `Flexible EFP paper `_, you also need -a library of precomputed EFP potentials. Parameter database from `Flexible EFP paper +a library of precomputed EFP potentials. The parameter database from the `Flexible EFP paper `_ can be found `here `_. @@ -58,12 +66,12 @@ The workflow leads the user through the following steps: FMO protein =========== -The Fenna-Matthews-Olson (FMO) protein is used as an example system. -Preparation of the QM anf EFP regions follows the work by -`Kim et al `_. +The Fenna-Matthews-Olson (FMO) protein is used as an example system throughout this tutorial. +Preparation of the QM and EFP regions follows the work by +`Kim et al. `_. -FMO is a trimeric protein with eight bacteriochloropyll a (BChl) pigments in each monomer. -FMO completes energy transfer via excitonic couplings across these eight BChls. +FMO is a trimeric protein with eight bacteriochlorophyll a (BChl) pigments in each monomer. +FMO facilitates energy transfer via excitonic couplings across these eight BChls. .. image:: ../images/FMO_trimer_BCLs.bmp :width: 350 @@ -73,63 +81,67 @@ FMO completes energy transfer via excitonic couplings across these eight BChls. :width: 400 :align: center -In the present example, water molecules more than 15 angstroms from the protein's surface have been removed. +In the present example, water molecules more than 15 Angstroms from the protein surface have been +removed. .. image:: ../images/fmo_waters15a.bmp :width: 400 :align: center -Molecular dynamics and constrained QM/MM geometry optimizations have been already performed -on the system. The constrained QM/MM geometry optimizations have been shown to be essential for accurate predictions of -optical spectra and redox properties.[REFS] However, this step is not considered in this tutorial and it is up to the user -to take care of it if needed. - -.. note:: The provided .g96 file contains `QSL` and `LA` residues, corresponding to so called "quantum" waters, i.e., water - molecules that could be included in the QM regions for QM/MM optimizations and link atoms for defining QM-MM boundaries, that - are 'left-overs' of the QM/MM optimizations. These residues are of course not mandatory for any of the scripts below. +Molecular dynamics and constrained QM/MM geometry optimizations have been performed on the +system prior to this tutorial. Constrained QM/MM geometry optimizations have been shown to be +essential for accurate predictions of optical spectra and redox properties. However, this step +is not covered here and it is left to the user to perform if needed. +.. note:: The provided .g96 file contains ``QSL`` and ``LA`` residues, corresponding to + "quantum" waters (water molecules included in the QM region during QM/MM optimizations) + and link atoms that were utilized in QM/MM optimizations, respectively. These are left + over from the QM/MM optimization step and are not required for any of the scripts in this tutorial. + .. _setup_QM_EFP: Defining QM and EFP regions =========================== -In the present example, the QM/EFP calculation will be set up for the third BChl (residue number 361) in the active (QM) region. +In the present example, the QM/EFP calculation will be set up for the third BChl (residue number 361) +in the active (QM) region. .. _efp_region: EFP region ---------- -While it is possible to include all non-QM atoms in the EFP region, this approach might be expansive for larger proteins, -both at the step of computing EFP parameters, and performing QM/EFP calculation. Thus, a practical approach is to -define the EFP region that will include all residues within a particular distance from the QM region. -Here, we will include every amino acid, (non-QM) BChls, and water molecules containing an -atom within 15 Angstroms of the BChl chlorin ring. +While it is possible to include all non-QM atoms in the EFP region, this approach can be +computationally expensive for larger proteins for computing EFP parameters +and performing QM/EFP calculations. A practical approach is to define an EFP region that +includes all residues within a particular distance from the QM region. Here, we include every +amino acid, non-QM BChl, and water molecule containing at least one atom within 15 Angstroms +of the BChl chlorin ring. + -.. note:: One can extract a single snapshot from GROMACS MD trajectory in .g96 format using - ``gmx traj -f md_50.trr -s md_50.tpr -o snapshot_timestep.g96`` [JACK] +.. note:: A single snapshot can be extracted from a GROMACS MD trajectory in .g96 format using: -.. note:: Note that extracting larger systems can occasionally cause columns to be misaligned - due to the residue ID number passing from 9999 to 10000. - This misalignment can make the structure appear strangely in VMD or other visualizers - (i.e., you will see "sheets" of waters with coordinates - read incorrectly). Though it shouldn't matter, you can fix the problem by realigning the columns. + ``gmx trjconv -s md_50.tpr -f md_50.trr -o snapshot_50000.g96 -dump 50000`` + +.. note:: Extracting larger systems can occasionally cause columns to be misaligned when the + residue ID passes from 9999 to 10000. This misalignment can make the structure appear + incorrectly in VMD or other visualizers (e.g., "sheets" of waters with misread coordinates). + Though it shouldn't matter for this tutorial, you can fix the problem by realigning the columns. `Column reformatting script <../examples/flex-EFP/Scripts/format.py>`_ is an example of a script that can fix this problem. -In order to figure out which amino acids, cofactors, and water molecules constitute the EFP region, -we make use of the ``gmx select`` command, but this will take a bit of footwork. First, we create an index file -that defines the BChl `headring` group that will contain atoms of chlorin ring. +To determine which residues constitute the EFP region, we use the ``gmx select`` +command. First, create an index file defining the BChl ``headring`` group containing the +atoms of the chlorin ring: -.. note:: The BChl chlorin ring (`headring`) is defined by the following atom names: +.. note:: The BChl chlorin ring (``headring``) is defined by the following atom names: ``MG CHA CHB HB CHC HC CHD HD NA C1A C2A H2A C3A H3A C4A CMA HMA1 HMA2 HMA3 NB C1B C2B C3B C4B CMB HMB1 HMB2 HMB3 CAB OBB CBB HBB1 HBB2 HBB3 NC C1C C2C H2C C3C H3C C4C CMC HMC1 HMC2 HMC3 CAC HAC1 HAC2 CBC HBC1 HBC2 HBC3 ND C1D C2D C3D C4D CMD HMD1 HMD2 HMD3 CAD OBD CBD HBD CGD O1D O2D CED HED1 HED2 HED3`` -The code below will generate this index file with the default -name ``index.ndx``; index.ndx contains all standard GROMACS index groups (system, protein, etc), -with a new added group, 'headring'. +The following code generates the index file (``index.ndx``) containing all standard GROMACS +index groups with the new ``headring`` group appended: .. literalinclude:: ../examples/flex-EFP/Scripts/gen_efp_index.sh :linenos: @@ -142,40 +154,39 @@ Here is a visualization of the atoms contained in the newly created index group: :width: 400 :align: center -We want the EFP region to be composed of all residues that contain at least one atom within -15 Angstroms of the `headring` atoms. The following command will do the job: +The following command selects all residues containing at least one atom within 1.5 nm +(15 Angstroms) of the ``headring`` group and writes them to a new index file: ``gmx select -s md_80.tpr -n index.ndx -f bchl361-79002.g96 -select -'same residue as within 1.5 of group "Headring"' -on shell_index.ndx`` [JACK] +'same residue as within 1.5 of group "Headring"' -on shell_index.ndx`` + +The output ``shell_index.ndx`` contains exactly one index group defining the EFP region. +Next, extract the EFP region into a new structure file: -This command looks for any atom within the 1.5 cutoff (GROMACS units are nm), -then accepts every atom belonging to -the same residue as the found atom, and adds these to the selection. -The output of the command is named `shell_index.ndx`, which, as the -extension implies, is another index file. This file has exactly one index group that defines the EFP region. +``gmx editconf -f formed_bchl361-79002.g96 -n shell_index.ndx -o shell_bchl361-79002.g96`` -Now, ``gmx editconf -f formed_bchl361-79002.g96 -n shell_index.ndx -o shell_bchl361-79002.g96`` -creates a new structure file of the EFP region only. -Note that you do not need to specify the group output because the `shell_index.ndx` file only contains one group. -The headring surrounded by the EFP region looks like this: +Note that no output group needs to be specified since ``shell_index.ndx`` contains only one +group. The resulting EFP shell surrounding the headring looks like this: .. image:: ../images/tester.bmp :width: 400 :align: center -.. note:: The QM region will include the entire BChl, but the EFP region is defined by a distance to the chlorin ring only. +.. note:: The QM region will include the entire BChl, but the EFP region is defined by distance to the chlorin ring only. QM region --------- -.. warning:: Defining the QM region is system-specific. The user needs to prepare a text file containing all atoms of the QM region - as well as pairs of atoms defining the covalent boundaries between the QM and EFP regions! +.. warning:: Defining the QM region is system-specific. The user must prepare a text file + containing all atoms of the QM region as well as pairs of atoms defining any covalent + boundaries between the QM and EFP regions. -Take a look at the example prepared for the 3rd BChl in FMO :download:`qm_defined.inp <../examples/flex-EFP/Scripts/qm_defined.txt>`). -This file can be constructed by -copying the corresponding lines from the `.g96` file. The "QM_atoms" section of the file should contain all QM atoms -(not including capping hydrogens for covalent QM-EFP boundaries). +An example file prepared for the third BChl in FMO is provided: +:download:`qm_defined.txt <../examples/flex-EFP/Scripts/qm_defined.txt>`. This file can be +constructed by copying the corresponding lines from the ``.g96`` file. The ``QM_atoms`` +section should contain all QM atoms, not including capping hydrogens for covalent QM-EFP +boundaries. .. literalinclude:: ../examples/flex-EFP/Scripts/qm_defined.txt :linenos: @@ -183,17 +194,20 @@ copying the corresponding lines from the `.g96` file. The "QM_atoms" section of :append: ... :emphasize-lines: 1 -The "QM-MM" boundary section is optional and should -contain pairs of atoms for each covalent boundary, with the QM atom listed first. -The example :download:`qm_defined.inp <../examples/flex-EFP/Scripts/qm_defined.txt>` contains only one QM-MM boundary, but a -second boundary could be included like this: +The "QM-MM" boundary section is optional and should contain pairs of atoms for each +covalent boundary, with the QM atom listed first. The example + :download:`qm_defined.txt <../examples/flex-EFP/Scripts/qm_defined.txt>` + contains only one QM-MM boundary, but a second boundary could be included like this: .. literalinclude:: ../examples/flex-EFP/Scripts/new_sample.txt :linenos: :lines: 1- :emphasize-lines: 1 -[JACK: do coordinates need to match?] +Note that the atom indexes MUST match those in the structure file, but the coordinates +do NOT need to match. If you are planning to run multiple EFP calculations on identically-built +structures (e.g. different times of a single MD trajectory), then you can reuse the same +QM-defining text file. .. _qm_region_bchl: @@ -201,65 +215,68 @@ Example: QM region for the third BChl in FMO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By trial and error we found that including the Mg-coordinating amino acid residue in the QM region -in QM/EFP calculations helps -SCF convergence and makes excitation energies more reliable. -Thus, the QM region will include all atoms of the BChl (residue BCL 361) as well as atoms from the nearby +in QM/EFP calculations helps SCF convergence and makes excitation energies more reliable. +Thus, the QM region will include all atoms of the BChl (residue BCL 361) as well as atoms from the nearby histidine that coordinates the BChl Mg atom shown below. .. image:: ../images/qm_region.bmp :width: 400 :align: center -This shows the entire residue 361 (BCL) and 290 (HIS), however, we only want to include the side chain of this histidine. -In other words, the residue atoms including :math:`C_{\beta}` should be included in the QM region, and the -backbone atoms (starting with :math:`C_{\alpha}`) should remain in the EFP region. That division looks like this: +The full residues 361 (BCL) and 290 (HIS) are shown above; however, only the side chain of +the histidine is included in the QM region. Specifically, residue atoms from :math:`C_{\beta}` +onward are included, while backbone atoms from :math:`C_{\alpha}` onward remain in the EFP +region: .. image:: ../images/qm_w_cut.bmp :width: 400 :align: center -Thus, we create a covalent QM-EFP boundary between :math:`C_{\beta}` and :math:`C_{\alpha}`, reflected in the example above. -The scripts will automatically cap the QM region with hydrogen atoms positioned between the boundary atoms, -so the QM region in the QM/EFP calculations will look like this: +A covalent QM-EFP boundary is therefore defined between :math:`C_{\beta}` and +:math:`C_{\alpha}`. The scripts automatically cap the QM region with hydrogen atoms positioned +along the boundary bond, so the QM region in the QM/EFP calculation looks like this: .. image:: ../images/qm_capped.bmp :width: 400 :align: center -We will discuss preparation of the EFP fragment for the boundary histidine later on. +Preparation of the EFP fragment for the boundary histidine is discussed in +:ref:`qmefp_boundary_fragments`. .. _fragmentation: Fragmentation of the EFP region =============================== -Now we need to prepare EFP fragments for our system. In :ref:`efp_region` we have already created `.g96` file containing atoms -belonging to the EFP region. Here we will split polypeptide chains and other large residues into EFP fragments. +Now we need to prepare EFP fragments for our system. In :ref:`efp_region` we have already +created a ``.g96`` file containing atoms belonging to the EFP region. Here we will split +polypeptide chains and other large residues into EFP fragments. .. _protein_efp: Splitting protein into EFP fragments ------------------------------------ -Because protein is a continuous chain, covalent bonds between neighboring amino acids have to be broken. -Chemically, we would like to break the :math:`C-C_{\alpha}` bond -(between alpha carbon and carbonyl carbon), however, standard PDB convention divides residues by the C-N bond (alpha carbon-nitrogen). +Because proteins tend to be continuous chains, covalent bonds between neighboring amino acids +have to be broken for fragmentation. Chemically, the :math:`C-C_{\alpha}` bond (between alpha +carbon and carbonyl carbon) is broken, however, standard PDB convention divides residues by the +C-N bond (alpha carbon-nitrogen). -PDB residues are divided like this: +Visually, PDB residues are divided like this: .. image:: ../images/pdb_67_col.bmp :width: 400 :align: center -EFP fragments need to be divided like this: +but EFP fragments should be divided like this: .. image:: ../images/efp_67_col.bmp :width: 400 :align: center -This, EFP amino acid fragments will not completely 'agree' with the PDB amino -acid numbering. Below is a snippet from the structure file (.g96) with the EFP fragment 7 highlighted. Note that atoms -'C' and 'O' of the PDB residue 6 (SER) will be included in the EFP fragment 7 (ASP). +Therefore, EFP amino acid fragments will not completely agree with PDB amino acid numbering. +Below is a snippet from the structure file (``.g96``) with EFP fragment 7 highlighted. Note +that atoms ``C`` and ``O`` of PDB residue 6 (SER) are included in EFP fragment 7 (ASP). .. literalinclude:: ../examples/flex-EFP/1.Prepare_Structure/bchl361-79002.g96 :linenos: @@ -269,61 +286,65 @@ acid numbering. Below is a snippet from the structure file (.g96) with the EFP f A daunting task of splitting the protein into EFP fragments is accomplished by script ``make_AAs.py``. A sample execution is: -``python make_AAs.py shell_bchl361-79002.g96 bchl361-79002.g96 qm_defined.txt topol.top`` +``python make_AAs.py shell_bchl361-79002.g96 bchl361-79002.g96 qm_defined.txt topol.top `` -The input parameters are: -* `.g96` file containing atoms of EFP region (see :ref:`efp_region`) -* `.g96` file containing a full system (your initial structure from MD) -* user-prepared file with definition of QM region and QM-EFP boundaries -* `*.top` topology file for your system +* ``.g96`` file containing atoms of the EFP region (see :ref:`efp_region`) +* ``.g96`` file containing the full system (initial structure from MD) +* user-prepared file defining the QM region and QM-EFP boundaries (see :ref:`setup_QM_EFP`) +* topology file (``topol.top``) +* timestamp to differentiate output files (e.g. ``50000``) +* mutant label (e.g. ``wt``) Output: -* GAMESS MAKEFP input files for all amino acids and other residues -.. note:: GAMESS MAKEFP input file parameters might be adjusted in ``make_AAs.py`` +* GAMESS MAKEFP input files for all amino acid and other residues in the EFP region -The script splits the EFP region of the protein into EFP fragments and creates a GAMESS MAKEFP input file for each fragment. -The script will create EFP fragments and corresponding input files for non-amino acid residues. +.. note:: GAMESS MAKEFP input file parameters (basis set, memory, etc.) can be adjusted + directly in ``make_AAs.py``. -The filenames use information from `*.g96` file; -for example, v_22_301.inp is a valine residue (v), residue number 22, and the first atom of this fragment has an atom -ID of 301. [JACK: based on which g96?] Histidine residues are denoted hd\_, he, or hp\_ depending on protonation states. -Capping hydrogens are added automatically to amino acid fragments, with atom names H000. The capping hydrogens will be -automatically removed from the amino acid EFP fragments once the EFP parameters are computed. +The script splits the EFP region into fragments and creates a GAMESS MAKEFP input file for +each. Output filenames are derived from the full system ``.g96`` file. For example, +``v_22_301_50000_wt.inp`` corresponds to a valine residue (``v``), residue number 22, with +first atom ID 301 (of the full structure, NOT the structure of the EFP region). Histidine residues +are denoted ``hd_``, ``he_``, or ``hp_`` depending on protonation state. Capping hydrogens +are added automatically to amino acid fragments, they will always have the atom name ``H000`` +(multiple virtual hydrogens will have the same atom name). These will be removed from the EFP +parameter files in a later step (see :ref:`trim_params`). -Non-standard amino acids are named with full residue names given in the g96 structure file -(i.e., bcl_360_5667.inp for the BCL with residue number 360). -Non amino acids fragments are created with no capping hydrogens by default. +Non-standard residues and cofactors are named using the full residue name from the ``.g96`` +file (e.g., ``bcl_360_5667_50000_wt.inp`` for BCL with residue number 360). Non-amino acid +fragments are created without capping hydrogens by default. .. _qmefp_boundary_fragments: QM-EFP boundary fragments -------------------------- +-------------------------- -For a fragment with a QM-EFP covalent boundary (e.g., HIS 290 from :ref:`qm_region_bchl`), -the script will add two comment lines to the end of the input file. -The ``!comment atoms to be erased`` line will contain atom names of the QM atoms to be removed from the -parameter file. `!polarization points to remove` will additionally list atoms around which polarization points -will be removed. These atoms are found as the covalently bound to the "to-be-deleted atoms" by analysis of -the topology file. -Here is an example for HIS 290 (hd_290_4417.inp): +For a fragment with a covalent QM-EFP boundary (e.g., HIS 290 from :ref:`qm_region_bchl`), +``make_AAs.py`` will append two comment lines to the end of the input file. The +``!erased:`` line lists atom names of QM atoms to be removed from the parameter file. +The ``!remove:`` line additionally lists atoms around which polarization points will be +removed, identified by analysis of the topology file. Here is an example for HIS 290 +(``hd_290_4417_50000_wt.inp``): -.. literalinclude:: ../examples/flex-EFP/1.Prepare_Structure/hd_290_4417.inp +.. literalinclude:: ../examples/flex-EFP/1.Prepare_Structure/hd_290_4417_50000_wt.inp :linenos: :emphasize-lines: 31,32 -Note that while CA (alpha-carbon) is not a part of the QM region, it is a QM-EFP boundary atoms and will be also removed from the -parameter file in a later step. The commented lines will be used later to finalize the fragment parameter files. -Here we follow a QM-EFP boundary scheme developed by Yongbin Kim `FMO paper `_ -that ensured stability of QM/EFP calculations in the FMO protein. +Note that while :math:`C_{\alpha}` is not part of the QM region, it is a QM-EFP boundary +atom and will also be removed from the parameter file in a later step. These comment lines +are used by ``cut_caps.py`` to finalize the fragment parameter files (see :ref:`trim_params`). +The boundary scheme follows the approach developed by +`Kim et al. `_, which ensures +stability of QM/EFP calculations in the FMO protein. .. image:: ../images/qm_efp_boundary_fragment.png :width: 400 :align: center -.. warning: The developed scheme for QM-EFP boundaries in amino acid fragments is general. However, - the user might need to modify it for non-standard fragments. +.. warning:: The QM-EFP boundary scheme for amino acid fragments is general. However, + the user may need to modify it for non-standard fragments. If a residue contains only QM atoms, such as BCL 361 in our example, no input file will be created. @@ -331,166 +352,239 @@ will be created. Non-amino acids and cofactors ----------------------------- -``make_AAs.py`` is robust in preparing EFP fragments for amino acid sequences and non-covalently linked molecules (e.g., -water molecules, ions, simple cofactors). However, large cofactors (exceeding 100 atoms) and non-protein polymers -need to be further split into EFP fragments. Making a general script that would solve this task is work -in progress. Here we provide a script that splits (B)Chl `a` into chlorin head and tail -fragments. +``make_AAs.py`` handles EFP fragment preparation for standard amino acid sequences, +non-covalently linked molecules (e.g., water molecules, ions), and cofactors including +(B)Chls. For large cofactors exceeding roughly 100 atoms, the script creates one fragment +for the entire residue by default. Sometimes, it is desirable to split a large cofactor into +sub-fragments. See below, the separate chlorin head and phytol tail fragments for a BCL. -.. note:: ``make_AAs.py`` creates fragments for all non-amino acid residues including (B)Chls. However, - it creates one fragment for the entire residue. If you plan to create sub-fragments as explained below, - you should delete the full molecule input files made with ``make_AA.py``. - -Below is the division of the head and tail groups in BChl: +Below is an example of the head/tail division used for BChl: .. image:: ../images/efp_headtail.bmp :width: 400 :align: center -The script ``make_bchls.py`` creates two fragment input files for each Bchl molecule according to that division. Capping -hydrogens are added where the two are normally covalently bound, much like the case for the amino acid backbone bonds. The -script can be executed by: - -``python make_bchls.py shell_bchl361-79002.g96 361`` +For the BChl case, capping hydrogens are added at the head-tail junction in the same way as +for amino acid backbone cuts. CLA, BCL, LMG, and LHG fragments are cut using this script, but +note that atom names must match those used in this example setup. The same syntax used for those +can be used to create a custom sub-fragmentation scheme. The relevant variables to adjust are +contained the ``define_ligand_cut`` function, and they are: -[JACK: wrong arguments here?] - -This script is "hard-coded" for the FMO BChls. You can try to edit it for your purposes. -Variable "RESNAME" is the name of the residue to be fragmented. The list "Headrings" includes the atom -names for every atom to be added to the head fragment. Atoms that are not in this list will -be added to the tail fragment. Variables "headside" and "tailside" are the names -of the two atoms that are normally covalently bound, but will be capped for the fragmentation. -Variable "site" is optional, it lists the residue ID of a QM residue that doesn't need to be fragmented -because it is in the QM region. +* ``RESNAME`` — residue name to be fragmented +* ``Rings`` — list of atom names to include in the head fragment; all others go to the tail +* ``headside`` / ``tailside`` — atom names on either side of the covalent cut +* ``site`` — optional; residue ID of a QM BChl that should be skipped entirely .. _parameter_making: EFP parameter generation ======================== -``make_AAs.py`` and ``make_bchls.py`` from the previous step created a collection of GAMESS MAKEFP input files. -Now you have two options: +``make_AAs.py`` from the previous step created a collection of GAMESS MAKEFP input files. +You now have two options for obtaining EFP parameters: + +* Compute all needed EFP parameters by running the MAKEFP inputs in GAMESS +* Use the FlexEFP procedure to obtain parameters by rotation of precomputed parameters + stored in a database -* compute all needed EFP parameters by running the MAKEFP inputs in GAMESS -* use FlexEFP procedure to obtain parameters by rotation of the already computed - parameters stored in the database +If your system is small or you do not wish to use the FlexEFP scheme, submit GAMESS +calculations for all generated inputs, collect all produced ``.efp`` files, and proceed +to :ref:`trim_params`. -If you system is small and/or you do not want to use FlexEFP scheme, submit GAMESS calculations -for all generated inputs, make sure to collect all produced ``.efp`` files and proceed -to the next section :ref:`clasical_fragment`. +Otherwise, ``frag_RMSD_V2.py`` checks the geometry of each fragment against fragments +available in the precomputed parameter database. For amino acids, the script searches only +among fragments of the same amino acid type. If the RMSD between the two structures is +below the threshold, the database parameters are considered +transferable and will be rotated and translated to match the geometry of your +fragment— no GAMESS calculation needed for that fragment. Note that if the RMSD is greater +than the cutoff threshold, you will need to generate the parameters of that fragment in +GAMESS. You can add computed parameters to your fragment library to reduce the computation +time of future timesteps. -Otherwise, ``fragment_RMSD.py`` script will check geometry of a fragment against -fragments available in the database of pre-computed parameters (for amino acids, the script is smart to search only -among the same-amino acid fragments). -If the RMSD between the two structures is sufficiently small, then the parameters are -considered good enough and will be adjusted (rotated and translated to exactly match geometry -of your fragment), such that you don't have to calculate them in GAMESS! +.. note:: The path to the EFP parameter database is hardcoded in ``frag_RMSD_V2.py``. + Adjust it to point to your local copy of the database before running. -.. note:: A path to the EFP parameter database is hard-coded in ``fragment_RMSD.py`` script. - Adjust it accordingly! +.. note:: The RMSD threshold is hardcoded in ``frag_RMSD_V2.py`` to 0.2 Angstroms. + Adjust if desired. -.. note:: The RMSD threshold value is hardcoded in ``fragment_RMSD.py`` to 0.2 Angstroms. Change - it if desired. +The script is run on one single GAMESS MAKEFP input file at a time: -The script `fragment_RMSD.py` can be run by the following example: +``frag_RMSD_V2.py a_4_45_50000_wt.inp`` -``fragment_RMSD.py a_4_45.inp`` +This reads ``a_4_45_50000_wt.inp`` (an alanine fragment, identified by the ``a_`` prefix) and +computes the RMSD against each ``.efp`` file found in the ``ala/`` subdirectory of the +database. The database subdirectory is determined automatically from the one-letter amino +acid prefix in the filename (e.g., ``a`` → ``ala/``, ``v`` → ``val/``, etc.). -This reads the given GAMESS MAKEFP input file (a_4_45.inp) and computes the RMSD with each .efp file it can find in the ``ala/`` -directory of the database ("a" in the input name is a one-letter name of alanine). +If no suitable match is found, the script prints: -[JACK: is t=it relevant? "then name the final folder(s) by amino acid (ie, ala/, cys/, thr/, etc)"] -[JACK: need to move step4.Flexible_V5.py to the directory with these scripts] +``No match, run GAMESS for: `` -If no suitable match is found in the database, the script will print a message of "No match, run GAMESS for: ", -and an .efp file will not be created. -If no database folder is found, or if the input is not a standard, -non-terminal amino acid, the python script will return an error. -For all such fragments, you will need to run GAMESS calculations. Fortunately, most of them +and no ``.efp`` file is created for that fragment. If no database folder is found, or if +the input is not a standard non-terminal amino acid, the script will return an error. All +such fragments will need to be computed in GAMESS. Fortunately, most of these calculations will not take more than a few minutes. -[JACK: need to make this part easier to use. e.g., create a list of all non-computed fragments or copy those inputs into -separate directory] +A simple way to run ``frag_RMSD_V2.py`` on all fragments at once is to iterate over every +``*.inp`` file in the current directory using a bash script: + +.. code-block:: bash + + for f in *.inp; do + python frag_RMSD_V2.py $f + done -[JACK: If you need to repeat this step, delete this text file to avoid confusion. - which text file?] +After running, any fragment for which no ``.efp`` file was produced will need to be +submitted to GAMESS. A convenient way to identify these is: + +.. code-block:: bash + + for f in *.inp; do + base="${f%.inp}" + if [ ! -f "${base}.efp" ]; then + echo "No EFP found, submit to GAMESS: $f" + fi + done -A simple way to use this script for all fragments in the EFP region is to make a bash script to iterate through every ``*.inp`` -file in the current directory. .. _clasical_fragment: The Classical Region Fragment ----------------------------- -Atoms not included in either QM or EFP regions, i.e., atoms far from the QM subsystem, will be included -in the QM/EFP calculation as classical atoms possessing only partial charges. We will combine all these classical -atoms into a single EFP "superfragment" that will contain only coordinates, monopoles and screen sections. -``make_mm.py`` reads the EFP region structure, the full -structure, and the topology to create this "superfragment" efp file. +Atoms not included in either the QM or EFP regions — i.e., atoms far from the QM subsystem +— are treated as classical atoms possessing only partial charges. These are combined into a +single EFP "superfragment" containing only coordinates, monopoles, and screen sections. -``python make_MM.py shell_bchl361-79002.g96 bchl361-79002.g96 topol.top`` +``make_AAs.py`` will create this, and it should not require any more preparation other than +the structure files already provided. -The topology file is necessary for atomic charges, and both structure files are read so that only MM atoms will be -included (both QM and EFP atoms are omitted). +The topology file provides atomic charges. Both structure files are required so that only +classical atoms are included — QM and EFP atoms are automatically omitted. .. _trim_params: EFP parameter trimming -====================== +======================= -Before using the QM/EFP calculations, EFP fragment parameters need to be trimmed. That is, +Before using the EFP parameters in QM/EFP calculations, the fragment parameter files must +be trimmed. Specifically: -* capping hydrogens that were added to amino acid fragments to make them closed-shell molecules for parameter - calculations need to be removed. All parameters associated with capping hydrogens need to be removed as well. -* fragments with QM-EFP covalent boundaries need to be cleaned from all QM atoms and boundary atoms. Additionally, - some of polarization points at the boundary need to be eliminated to avoid overpolarization of the QM region. -* [JACK: what about BChls? which script takes care of those?] +* Capping hydrogens added to amino acid fragments to make them closed-shell molecules for + parameter calculations must be removed, along with all associated parameters. +* Fragments with covalent QM-EFP boundaries must be cleaned of all QM atoms and boundary + atoms. Additionally, polarization points near the boundary must be removed to avoid + overpolarization of the QM region. +* Sub-fragmented molecules are treated in the same way as amino acid fragments — capping + hydrogens at the head-tail junction are removed automatically. -[JACK: cut_qm.py name is misleading] +This trimming is handled by ``cut_caps.py``. The script uses the comment lines appended by +``make_AAs.py`` (see :ref:`qmefp_boundary_fragments`) to determine which atoms and +polarization points to remove. -``cut_qm.py`` script trims EFP parameters following the (optional) information provided in the MAKEFP input files. +.. note:: ``cut_caps.py`` will always remove hydrogens named ``H000`` (capping hydrogens + added to cap dangling bonds when fragments were cut from a larger molecule) and all + associated parameters, regardless of the comment lines in the input file. -.. note:: ``cut_qm.py`` will always delete hydrogens named H000 (capping hydrogens added to cap dangling bonds when - fragments were cut from a larger molecule) and the associated parameters. +QM-EFP boundary fragments are treated based on the ``!erased:`` and ``!remove:`` comment +lines in the input file, as described in :ref:`qmefp_boundary_fragments`. -QM-EFP boundary fragments will be treated based on the information provided in comment sections in the input files, -see :ref:`qmefp_boundary_fragments` +``cut_caps.py`` operates in two modes: ``check`` (default) and ``fix``. -A sample execution is: +In ``check`` mode, the script validates the EFP parameter file for completeness and +sanity without modifying it: -``cut_qm.py hd_290_4417.inp hd_290_4417.efp`` +* Exits with an error if the ``.efp`` file is incomplete (missing ``$end`` statement) +* Exits with an error if the fragment charge is non-integer or if partial charges + exceed the ``charge_cutoff`` threshold +* Prints a warning if large polarizability values are found -[JACK: what is this paragraph? Does not match with the example: "ala_33_473.inp is an alanine fragment with -QM atoms commented to be deleted and EFP atoms to have polarization -points removed. a0001.efp is a parameter file that is either the output of the input file, or translated -information from a database parameter file to align with the input's coordinates."] +In ``fix`` mode, the script performs the trimming and writes the result to +``cut_``. -``cu_qm.py`` must be executed on every amino acid and other fragmented residues. +.. note:: Check and adjust the ``charge_cutoff`` and ``polab_cutoff`` settings at the + top of ``cut_caps.py`` if needed for your system. + +A sample execution in ``check`` mode: + +``python cut_caps.py hd_290_4417_50000_wt.inp hd_290_4417_50000_wt.efp`` + +A sample execution in ``fix`` mode: + +``python cut_caps.py hd_290_4417_50000_wt.inp hd_290_4417_50000_wt.efp fix`` + +The two arguments are: + +* GAMESS MAKEFP input file (``.inp``) — provides the atom removal instructions via + comment lines +* EFP parameter file (``.efp``) — either the direct output of the GAMESS MAKEFP + calculation, or a parameter file rotated and translated by ``frag_RMSD_V2.py`` + +``cut_caps.py`` must be executed for every fragment in the EFP region. A convenient +way to run it on all fragments at once is: + +.. code-block:: bash + + for f in *.inp; do + base="${f%.inp}" + if [ -f "${base}.efp" ]; then + python cut_caps.py $f ${base}.efp fix + fi + done + +After trimming, the resulting ``cut_*.efp`` files are ready for use in the QM/EFP input +generation step. .. _qchem_input: QM/EFP input generation -======================= +======================== -Now that all fragment ``.efp`` files are prepared, the QM/EFP input in Q-Chem format can be created. -[JACK: change to make_final_qchem.py] -``make_final.py`` will take care of this step. Sample execution looks like: +Now that all fragment ``.efp`` files are prepared, the QM/EFP input file in Q-Chem format +can be assembled. This is handled by ``make_qchem_input.py``. A sample execution is: -``make_final.py shell_bchl361-79002.g96 bchl361-79002.g96 qm_defined.txt`` +``python make_qchem_input.py shell_bchl361-79002.g96 bchl361-79002.g96 qm_defined.txt topol.top timestamp mutant`` -Function "build_header" contains the keywords for the Q-Chem calculation; these can be adjusted as needed. When you submit -the calculation, .efp parameter files will need to be in the same folder as the input file. +The input arguments are very similar to those in make_AAs.py: -[JACK: to do: make similar function for GAMESS QM/EFP calc. Check fragnames...] +* ``.g96`` file containing atoms of the EFP region (see :ref:`efp_region`) +* ``.g96`` file containing the full system (initial structure from MD) +* user-prepared file defining the QM region and QM-EFP boundaries (see :ref:`setup_QM_EFP`) +* topology file (``topol.top``) +* timestamp to differentiate output files (e.g. ``50000``) +* mutant label (e.g. ``wt``) + +The Q-Chem calculation keywords are defined in the ``build_header`` function inside +``make_qchem_input.py`` and can be adjusted as needed for your calculation type (e.g., +excitation energy, redox potential). + +.. note:: When submitting the Q-Chem calculation, all ``cut_*.efp`` parameter files must + be present in the same directory as the input file, and they MUST be renamed to match + the names listed in ``efp_region.txt`` (by default, this is the same name without the + ``cut_`` prefix). It is convenient to move the cut_*efp files to a new folder to avoid + overwriting the original GAMESS outputs. + +.. note:: ``make_qchem_input.py`` currently generates input files for Q-Chem only. + Users wishing to run QM/EFP calculations in GAMESS will need to adapt the + ``build_header`` function accordingly. Time-Saving Tips ================ -In the case of FMO, the normal procedure is to repeat the EFP calculation for eight different BChl pigments. Fragments -can be reused between calculations if they come from the same snapshot. I.e., bchl360-79002.g96 will need a handful of new -fragments than those created already for bchl361-79002.g96, but the majority of EFP fragments are already made. This can save -a lot of time in repeated calculations. - -Another suggestion is creating your own library of EFP fragments and using it for FlexEFP step. -Often, the library created for one protein will work very well for related proteins. +In the case of FMO, the typical procedure is to repeat the QM/EFP calculation setup for +each of the eight BChl pigments in the monomer. EFP fragments can be reused between +calculations if they come from the same MD snapshot. For example, setting up a calculation +centered on BChl 360 will require only a small number of new fragments compared to those +already generated for BChl 361, since the majority of the EFP shell overlaps. This can +save considerable time when setting up repeated calculations across multiple pigments. + +A second time-saving strategy is to build and maintain your own library of precomputed EFP +fragments for use with the FlexEFP step. In practice, a library built for one protein will +often transfer well to structures of the same trajectory, reducing the number of +GAMESS calculations needed. + +For large-scale studies involving many snapshots, consider wrapping the full pipeline — +from ``make_AAs.py`` through ``cut_caps.py`` to ``make_qchem_input.py`` — in a bash or +Python driver script that iterates over many snapshots. Note that the primary bottleneck +will be waiting for GAMESS to generate fragment parameters. \ No newline at end of file diff --git a/examples/flex-EFP/Scripts/cut_caps.py b/examples/flex-EFP/Scripts/cut_caps.py new file mode 100644 index 0000000..23e5792 --- /dev/null +++ b/examples/flex-EFP/Scripts/cut_caps.py @@ -0,0 +1,790 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Sep 11 07:42:51 2024 + +@author: jackl & lyuda + +Sample execution: + cut_caps.py makefp.inp makefp.efp mode + -- mode is 'check' (default) or 'fix' + +The script prepares efp prameter file for efp or qmefp calculations. +1. It checks efp file for errors and potential problems + - script exits with error if the efp file is not complete ($end statement is missing), + fragment charge is not integer, or partial charges exceed |3|) + - warning is printed if large (>20) polarizability values are found + +!!!! Check/adjust charge_cutoff and polab_cutoff settings if needed !!!! + + Nothing more happens in mode 'check' + +2. In 'fix' mode, the script continues by cutting off capping hydrogens and other atoms + following comments in inp file and cleans corresponding multipoles and polarizability + points. The residual charge of the fragment is corrected. + + Additioanlly, POLAB section is added if large polarizablities are found. + The resulting efp potential is saved in cut_{makefp.efp} file. +""" + +import sys +import numpy as np +import itertools +import os + + + +######################################### +### USER SETTINGS + +# charge_cutoff is the value of a partial charge when the potential is +# considered bad without hope (probably meaning that makefp job did not +# run properly) +charge_cutoff = 4.0 + +# polab_cutoff is a used to assign polab values based on the values of polarizabilities +# { polarizabiloity_value : polab_value} +# provide as many entries as wanted +polab_cutoff = {20.0 : 0.2, 50.0 : 0.1, 100.0 : 0.05} + +######################################### + +if len(sys.argv) < 3: + print('Provide 1) input file, 2) efp file, and optionally, mode of operation (check or fix, default: check). Exit') + sys.exit() + +inp = sys.argv[1] +efp = sys.argv[2] +if len(sys.argv) > 3: + mode = sys.argv[3] +else: + mode = 'check' + + + +def extract_atomnums_B(line): + """ + Given a line starting with 'B', extract two atom numbers based on the length + of the first token. + """ + token = line.split()[0] + if len(token) == 4: + return int(token[2]), int(token[3]) + elif len(token) == 5: + return int(token[2:4]), int(token[4]) + elif len(token) == 6: + return int(token[2:4]), int(token[4:6]) + else: + return None, None + +def extract_bond_atoms(bond_name): + """ + Given a line starting with 'B', extract two atom numbers based on the length + of the first token. + """ + token = bond_name + if len(token) == 4: + return int(token[2]), int(token[3]) + elif len(token) == 5: + return int(token[2:4]), int(token[4]) + elif len(token) == 6: + return int(token[2:4]), int(token[4:6]) + else: + return None, None + +def distance(x1, y1, z1, x2, y2, z2): + dist = np.sqrt((float(x1) - float(x2))**2 + + (float(y1) - float(y2))**2 + + (float(z1) - float(z2))**2) + return dist + +def get_specials(lines): + """ + Grab input file lines to obtain the atom IDs that should be removed entirely + and the atom indexes for which only the polarizable points should be removed. + + The function looks for keywords 'erased:' and 'remove:' in the lines. + -make_AAs_V2.py added these comments. + + Reads: + lines: input file (.inp) lines. + Returns: + atomid (list): Atom indexes for complete removal. + """ + atoms = [] # List for names of atoms to be removed entirely. + + # First, extract the atom names from lines that contain 'erased:' and 'remove:' + for line in lines: + j = -1 + if 'erased:' in line: + # Walk backwards from the end of the line until reaching 'erased:' + while line.split()[j] != 'erased:': + atoms.append(line.split()[j]) + j -= 1 + + # Scan through the file lines to grab the positions of the pesky atoms/polarizabilities. + i = 0 + atomid = [] # List to record the atom index numbers for full removal. + start = 0 # Flag to indicate when the coordinates section starts. + for line in lines: + if '$end' in line: + start = 0 + continue + if start == 1: + i += 1 # Increment a counter for each coordinate line + # Check the atom name (first token in the line) + if line.split()[0] in atoms: + atomid.append(i) + # Additionally, if the atom label contains 'H000', mark it for removal + # -All virual hydrogens are to be removed. + elif 'H000' in line: + atomid.append(i) + # The coordinates section is assumed to start when a line: 'C1' + if line.strip() == 'C1': + start = 1 + return atomid + +def get_coords(lines, atoms): + """ + Grab the coordinate lines while separating those that should be removed (rem_coords) + from those to be kept (coords). + + Reads: + lines: Lines from the EFP file. + atoms: Atom indexes to remove entire atom. + + Returns: + coords, rem_coords; where: + - coords: List of coordinate lines to keep. + - rem_coords: List of coordinate lines that are flagged for removal. + """ + start = False + coords = [] + rem_coords = [] + + # Process each line in the file. + for line in lines: + # Exit when "STOP" is encountered. + if 'STOP' in line: + return coords, rem_coords + # Begin processing with the "COORDINATES" section. + if 'COORDINATES' in line: + start = True + continue + if not start: + continue + + # If the line starts with 'A', extract the two-digit atom number. + if line.startswith('A'): + atomnum = int(line[1:3]) + if atomnum in atoms: + rem_coords.append(line) + else: + coords.append(line) + + # For lines starting with 'B', use the helper to extract atom numbers. + elif line.startswith('BO'): + atomnum, atomnum2 = extract_atomnums_B(line) + if atomnum is None: + continue # Skip if extraction failed + # If either atom number is in the removal list, add to rem_coords. + if atomnum in atoms or atomnum2 in atoms: + rem_coords.append(line) + else: + coords.append(line) + + return coords, rem_coords + +def get_monopoles(efp_lines, keep_coords, remove_coords): + + # extract lines containing monopoles + start = 0 + mono_lines = [] + for line in efp_lines: + if 'MONOPOLE' in line: + start = 1 + continue + if start == 1: + if 'STOP' in line: + start = 0 + break + else: + mono_lines.append(line) + + # create master list with all important info + monopoles = [] + for line in mono_lines: + parts = line.split() + if parts[0][0] == 'A': # atom + monopoles.append(['atom', int(parts[0][1:3]), parts[0], float(parts[1]), float(parts[2])]) + elif parts[0][:2] == 'BO': # bond mid point + atom1, atom2=extract_atomnums_B(line) + monopoles.append(['bond', [atom1,atom2], parts[0], float(parts[1]), float(parts[2])]) + else: + print(f'Could not assign line {line} to atom or bond. Error') + sys.exit() + + keep_names = [] + for coord in keep_coords: + keep_names.append(coord.split()[0]) + remove_names = [] + for coord in remove_coords: + remove_names.append(coord.split()[0]) + + # analyze names and create arrays connecting names and atom numers + H_caps = [] + bonds = [] + non_caps = [] + keep_atoms = [] + for name in remove_names: + if name[-4:] == 'H000': # capping H + H_caps.append(int(name[1:3])) # real atom number + elif name[:2] == 'BO': # bond mid point + atom1, atom2=extract_bond_atoms(name) + bonds.append([atom1, atom2]) + else: + #print(f'non-capping atom to be removed {name}') + non_caps.append(int(name[1:3])) + for name in keep_names: + if name[0] == 'A': # atom + keep_atoms.append(int(name[1:3])) + + # points with charges already redistributed + removed_charge_names = [] + + # combine H_caps and corresponding bonds and compute those total charges + for cap in H_caps: + removed_charge_names_tmp = [] + cap_charge = 0.0 + for mon in monopoles: + if cap == mon[1]: + #print('cap',cap, mon) + removed_charge_names_tmp.append(mon[2]) + cap_charge += mon[3] + mon[4] + break + # atom to add charge to + atom_to = -1 + for bond in bonds: + if cap in bond: # found corresponding bond + for mon in monopoles: + if mon[0] == 'bond': + if bond == mon[1]: + #print('bond', bond, mon) + removed_charge_names_tmp.append(mon[2]) + cap_charge += mon[3] + mon[4] + # atom_to is another number in the bond + if cap == bond[0]: + atom_to = bond[1] + else: + atom_to = bond[0] + #print(f'atom_to {atom_to}') + break + # working with atom_to charge + if atom_to == -1: # something went wrong + print(f'did not find matching atom for H_cap {cap}. Error') + sys.exit() + else: + if atom_to in keep_atoms: + for mon in monopoles: + if mon[0] == 'atom': + if atom_to == mon[1]: + #print(f'adding charge {cap_charge} to {mon}') + # adding cap and bond names to removed_charge array only if there is a matching atoms + # to add charge to + for name in removed_charge_names_tmp: + removed_charge_names.append(name) + mon[3] += cap_charge + break + + # in case the cap is connected to the atom that also will be removed, + # simply add it to non_caps array and treat there + elif atom_to in non_caps: + non_caps.append(cap) + else: + print(f'Could not find where to place a charge of H_cap {mon}. Error') + sys.exit() + + # heavy atoms to be removed + # two tasks: 1. compute total charge to be shifted + # 2. find the atom to which to place it + # start with step 2. + # There might be multiple bonds along which to-be-kept and + # to-be-removed regions are connected + + if non_caps: + atoms_to = [] + for atom in keep_atoms: # remaining atoms + for bond in bonds: # to be removed bonds + if atom in bond: # found atoms that is connected to the one to be removed + atoms_to.append(atom) + #print(f'Charge will be placed to atom {atom_to}') + if not atoms_to: + print('did not find matching atom for deleted heavy atoms. Error') + sys.exit() + + # total charge to be shifted + charge = 0.0 + for name in remove_names: + if name not in removed_charge_names: # not worked with this monopoles during H_caps removal + for mon in monopoles: + if name == mon[2]: + charge += mon[3] + mon[4] + #print('charge', charge) + + # adding this charge to atoms in atoms_to // split charge equallyif multiple atoms are there + charge /= len(atoms_to) + for mon in monopoles: + if mon[0] == 'atom': + for atom in atoms_to: + if mon[1] == atom: + mon[3] += charge + #print(f'adding charge {charge} to {mon}') + + # print out resulting info + outlines = [] + for mon in monopoles: + if mon[2] in keep_names: + outlines.append(f'{mon[2]} {mon[3]:0.10f} {mon[4]:0.5f}\n') + return outlines + + +def get_dipoles(lines, coords): + """ + Extract the dipoles section lines that should be removed. + + Reads: + lines (list of str): Lines from the EFP file. + cut_coords (list of str): Coordinate lines of atoms to remove. + + Returns: + dipoles (list of str): Lines from the DIPOLES section matching the kept atoms. + """ + dipoles = [] + keep_names = [atom.split()[0] for atom in coords] + start = 0 + for line in lines: + if start == 1: + if 'STOP' in line: + return dipoles + # append lines if the atom name is in the list of coordinates. + if line.split()[0] in keep_names: + dipoles.append(line) + if 'DIPOLES' in line: + start = 1 + return dipoles + +def get_quadrupoles(lines, coords): + """ + Extract the quadrupoles section lines. + + Uses counters (k and j) to control how many lines to skip or include + when the atom name is encountered. Quardupoles are multi-line entries. + + Reads: + lines (list of str): Lines from the EFP file. + cut_coords (list of str): Coordinate lines of atoms to remove. + + Returns: + quadrupoles (list of str): Lines from the QUADRUPOLES section. + """ + quadrupoles = [] + keep_names = [atom.split()[0] for atom in coords] + start = 0 + k = 0 # Counter to skip one line after a match + j = 0 # Counter to include one line after a non-match + for line in lines: + if start == 1: + if 'STOP' in line: + return quadrupoles + elif k == 1: + k -= 1 + elif j == 1: + quadrupoles.append(line) + j -= 1 + # When encountering a cut name, set k to skip the next line; do not append. + elif line.split()[0] not in keep_names: + k = 1 + else: + quadrupoles.append(line) + j = 1 + if 'QUADRUPOLES' in line: + start = 1 + return quadrupoles + +def get_octupoles(lines, coords): + """ + Extract the octupoles section lines. + + Uses counters (j and k) similar to quadrupoles to control skipping/inclusion. + -octupoles are also multi-lin entries. + + Reads: + lines (list of str): Lines from the EFP file. + cut_coords (list of str): Coordinate lines of atoms to remove. + + Returns: + octupoles (list of str): Lines from the OCTUPOLES section. + """ + octupoles = [] + keep_names = [atom.split()[0] for atom in coords] + start = 0 + j = 0 + k = 0 + for line in lines: + if start == 1: + if 'STOP' in line: + return octupoles + elif k > 0: + k -= 1 + elif j > 0: + octupoles.append(line) + j -= 1 + elif line.split()[0] not in keep_names: + k = 2 + else: + octupoles.append(line) + j = 2 + if 'OCTUPOLES' in line: + start = 1 + return octupoles + + +def get_polarpts(lines, cut_coords): + """ + Extract the polarizable points section. + + For each polarizable point, compute the distance to each atom in the cut_coords. + If the minimum distance is greater than cutoff, the point is retained. + -Polarizable points do not have "atomnames" the same as the multipole parameters. + + Reads: + lines (list of str): Lines from the EFP file. + cut_coords (list of str): Coordinates of atoms to remove. + + Returns: + polars (list of str): Lines from the POLARIZABLE POINTS section that pass the filter. + """ + + # atoms to be removed + atom_cut_coords = [] + for line in cut_coords: + if line.rsplit()[0][0] == 'A': # atom + atom_cut_coords.append(line) + + # H000s + Hs=[] + for line in cut_coords: + if 'H000' in line: + Hs.append(line) + + polars = [] + #remove_names = [atom.split()[0] for atom in cut_coords] + start = 0 + j = 0 + for line in lines: + if start == 1: + if 'STOP' in line: + return polars + elif j > 0: + polars.append(line) + j -= 1 + # Check lines starting with "CT" (assumed polarizable point lines) + elif 'CT' in line: + j=3 +########################################################################## +######### ATTENTION: CUTOFFS to remove polarizability points ########### +######### different cutoofs for H000 and other removed atoms ########### +######### want to remove nond and LP LMOs around the removed atom ##### +######### might need to adjust for heavier atoms #### +########################################################################## + # severe cut: remove polarizability points on neigboring atoms + for atom in atom_cut_coords: + #j=3 + # Calculate the distance between the polarizable point and an atom in cut_coords. + current_dist = distance(line.split()[1], line.split()[2], line.split()[3], + atom.split()[1], atom.split()[2], atom.split()[3]) + if current_dist < 1.88973 * 1.6: # in Bohr + j = 0 + break + + # soft cut here: only delete the pol point at the nearest bond mid-point + for atom in Hs: + current_dist = distance(line.split()[1], line.split()[2], line.split()[3], + atom.split()[1], atom.split()[2], atom.split()[3]) + if current_dist < 1.88973 * 0.8: + j = 0 + break + + if(j!=0): + polars.append(line) + if 'POLARIZABLE POINTS' in line: + start = 1 + return polars + +def get_screen(lines, coords, title): + """ + Extract screening parameter lines from the file. + + Only lines with atom names in coords are kept. + + Reads: + lines (list of str): Lines from the EFP file. + coords (list of str): Coordinates used to filter the screening parameters. + title (str): A string that identifies the screen section (e.g., 'SCREEN ' or 'SCREEN2'). + + Returns: + params (list of str): Filtered screening parameter lines. + """ + params = [] + keep_names = [atom.split()[0] for atom in coords] + start = 0 + for line in lines: + if start == 1: + if 'STOP' in line: + return params + if line.split()[0] in keep_names: + params.append(line) + if title in line: + start = 1 + return params + +def get_header(lines, input_name): + """ + Construct the header for the output file by replacing the placeholder '$FRAGNAME' + with the fragment name (derived from the input file name); otherwise retain the same + heading lines as-is from the .efp file. + + Reads: + lines (list of str): Lines from the EFP file. + input_name (str): The input file name. + + Returns: + header (list of str): The modified header lines. + """ + header = [] + fragname = input_name.split('.')[0] + for line in lines: + header.append(line.replace('$FRAGNAME', '$'+fragname)) + # Stop adding header lines once the COORDINATES section is encountered. + if 'COORDINATES (BOHR)' in line: + return header + +def check_completeness(efp_lines): + + start = 0 + charges = [] + polarizabilities = [] + pol_name = '' + error = 0 + + # check monopoles + for line in efp_lines: + if 'MONOPOLES' in line: + start = 1 + continue + if 'STOP' in line: + start = 0 + continue + if start == 1: + parts = line.rsplit() + charges.append([float(parts[1]), float(parts[2])]) + if 'POLARIZABLE POINTS' in line: + start = 2 + continue + if start == 2: + if 'CT' in line: + parts = line.rsplit() + pol_name = parts[0] + pol = [] + continue + else: + parts = line.rsplit() + if len(parts) == 5: + pol.append([ float(parts[0]), float(parts[1]),float(parts[2]),float(parts[3]) ]) + elif len(parts) == 1: + pol.append( [float(parts[0])] ) + pol = list(itertools.chain.from_iterable(pol)) + polarizabilities.append([pol_name, pol]) + pol_name = '' + if '$END' in line or '$end' in line: + start = -1 + + # analysis and info + if start != -1: + #print(f'ERROR: {efp} is incomplete!') + error = 1 + + return error, charges, polarizabilities + +def check_charges(charges, max_charge): + error = 0 + charges_np = np.array(charges) + chg = 0.0 + total_charge = 0.0 + error_charge = 0.0 + for c in charges_np: + ch = c[0] + c[1] + if abs(ch) > chg: + chg = abs(ch) + total_charge += ch + if abs(total_charge - round(total_charge)) > 0.001: + #print(f'WARNING: {efp} has total non-integer charge {total_charge}') + error_charge = abs(total_charge - round(total_charge)) + error = 1 + if chg > max_charge: + #print(f'WARNING: {efp} has large partial charge {chg}') + error_charge = chg + error = 2 + return error, error_charge + +def check_pol(polarizabilities, polab_cutoff): + error = 0 + max_pol = -1 + polab = -1 + max_pol_final = -1 + + if not polarizabilities: + error = 1 + else: + for pol in polarizabilities: + for p in pol[1]: + if abs(p) > max_pol: + max_pol = abs(p) + + polab_cutoff_ordered = dict(sorted(polab_cutoff.items())) + + for key in polab_cutoff_ordered: + if max_pol >= key: + polab = polab_cutoff_ordered[key] + max_pol_final = max_pol + + return error, max_pol_final, polab + + + +# Read input files +with open(inp, 'r') as inp_: + inp_lines = inp_.readlines() +with open(efp, 'r') as efp_: + efp_lines = efp_.readlines() + + +# check efp potential for completeness and sanity +# Exit with error if potential is incomplete (does not have $end finisher) +# or if partial charges are very large + +polab = -1 +max_pol = -1 +error_complete, charges, polarizabilities = check_completeness(efp_lines) +error_elec, max_charge = check_charges(charges, charge_cutoff) +error_pol, max_pol, polab = check_pol(polarizabilities, polab_cutoff) + +# conclusions based on the original assessment of the efp potential +if error_complete: + error_inp = 'err_' + inp + os.system(f'cp {inp} {error_inp}') + print(f'{efp} is incomplete. Error file {error_inp} is created.') + sys.exit() + +if error_elec: + error_inp = 'err_' + inp + os.system(f'cp {inp} {error_inp}') + if error_elec == 1: + print(f'{efp} has non-integer charge of {max_charge}. Error file {error_inp} is created.') + elif error_elec == 2: + print(f'{efp} has large partial charge of {max_charge}. Error file {error_inp} is created.') + sys.exit() + +if error_pol: + error_inp = 'err_' + inp + os.system(f'cp {inp} {error_inp}') + print(f'WARNING! {efp} has no polarizability section. Error file {error_inp} is created.') + # do not exit here in case no pol section is intended + +if max_pol > 0: + print(f'WARNING! {efp} has large polarizability point of {max_pol}. POLAB value of {polab} will be added.') + +# exit if not 'fix' mode +if mode != 'fix': + sys.exit() + +# changing the efp potential in 'fix' mode +else: + # Construct header by replacing '$FRAGNAME' with the actual fragment name. + header = get_header(efp_lines, inp) + + # Get atom indexes for complete removal + # Polarizable point removals will happen based on removed atoms + rem_atoms = get_specials(inp_lines) + + # Extract coordinate lines: + # keep_coords: coordinate lines to be kept, + # rem_coords: coordinate lines flagged for removal. + keep_coords, rem_coords = get_coords(efp_lines, rem_atoms) + + # Extract sections for monopoles, dipoles, quadrupoles, octupoles, and polarizable points. + keep_monop = get_monopoles(efp_lines, keep_coords, rem_coords) + + # run check_charges again on a new charge section + charges_updated = [] + for ll in keep_monop: + charges_updated.append([float(ll.split()[1]), float(ll.split()[2])]) + error_elec, max_charge = check_charges(charges_updated, charge_cutoff) + if error_elec: + error_inp = 'err_' + inp + os.system(f'cp {inp} {error_inp}') + if error_elec == 1: + print(f'WARNING! Modified {efp} file (cut_{efp}) has non-integer charge of {max_charge}. Check it!') + elif error_elec == 2: + print(f'WARNING! Modified {efp} file (cut_{efp}) has large partial charge of {max_charge}. Check it!') + + keep_dip = get_dipoles(efp_lines, keep_coords) + keep_quadrup = get_quadrupoles(efp_lines, keep_coords) + keep_octup = get_octupoles(efp_lines, keep_coords) + + # Hidden here distance cutoffs to eliminate bond LMOs around atoms + keep_pols = get_polarpts(efp_lines, rem_coords) + + # Extract screening parameters from two different screen sections. + keep_screen = get_screen(efp_lines, keep_coords, 'SCREEN ') + keep_screen2 = get_screen(efp_lines, keep_coords, 'SCREEN2') + + # Write the final output file with the appropriate sections. + with open('cut_' + efp, 'w') as outfile: + # Write header lines. + for outline in header: + outfile.write(outline) + # Write coordinates. + for outline in keep_coords: + outfile.write(outline) + # Write monopoles section. + outfile.write(' STOP\nMONOPOLES\n') + for outline in keep_monop: + outfile.write(outline) + # Write dipoles section. + outfile.write(' STOP\nDIPOLES\n') + for outline in keep_dip: + outfile.write(outline) + # Write quadrupoles section. + outfile.write(' STOP\nQUADRUPOLES\n') + for outline in keep_quadrup: + outfile.write(outline) + # Write octupoles section. + outfile.write(' STOP\nOCTUPOLES\n') + for outline in keep_octup: + outfile.write(outline) + # Write polarizable points section. + outfile.write(' STOP\nPOLARIZABLE POINTS\n') + for outline in keep_pols: + outfile.write(outline) + # Write screening sections. + outfile.write(' STOP\nSCREEN2 (FROM VDWSCL= 0.700)\n') + for outline in keep_screen2: + outfile.write(outline) + outfile.write('STOP\nSCREEN (FROM VDWSCL= 0.700)\n') + for outline in keep_screen: + outfile.write(outline) + outfile.write('STOP\n') + if polab != -1: + outfile.write(f'POLAB {polab:.2f}\nSTOP\n') + outfile.write(' $END\n') + diff --git a/examples/flex-EFP/Scripts/frag_RMSD_V2.py b/examples/flex-EFP/Scripts/frag_RMSD_V2.py new file mode 100644 index 0000000..af62a03 --- /dev/null +++ b/examples/flex-EFP/Scripts/frag_RMSD_V2.py @@ -0,0 +1,294 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Aug 14 2024 + +@author: jackl +""" + +import sys +import numpy as np +import os + +''' +Sample Execution: python clah_RMSD.py clah_blah.inp + +This file takesa GAMESS input file, then searches 'base_directory' for a folder that matches the +fragment type (if it is an amino acid). If no folder is found, a message will be printed. +If no good match is found, a different message will be printed. Alternatively, you can uncomment +a line near the end of this file to automatically run GAMESS for an amino acid fragment that did +not find a good match. + +''' + +############## USER DEFINED PATHS!!! ################################ +# Directory containing the library files. CHANGE +base_directory = '/depot/lslipche/data/PS1/efpdb/' +# Directory with FlexEFP script +flex_script_path = '/depot/lslipche/data/PS1/scripts/' +####################################################################### + +if len(sys.argv) != 2: + print('Provide makefp input name') + sys.exit() + + +#print(f'Searching fragment library in {base_directory} directory') + +ang_cutoff = 2.5 # Minimum RMSD allowed for a "good" match (in Angstroms) +bohr_cutoff = ang_cutoff * 1.8897259886 # Convert Angstrom cutoff to Bohr + +#Add to these if there are more +amino_acid_dict = { + 'a': 'ala', 'r': 'arg', 'n': 'asn', 'd': 'asp', 'c': 'cys', + 'q': 'gln', 'e': 'glu', 'g': 'gly', 'h': 'hip', 'i': 'ile', + 'l': 'leu', 'k': 'lys', 'm': 'met', 'f': 'phe', 'p': 'pro', + 's': 'ser', 't': 'thr', 'w': 'trp', 'y': 'tyr', 'v': 'val', + 'hp': 'hip', 'hd': 'hid', 'he': 'hie', + 'clah': 'clah', 'bcr':'bcr', 'sf4':'sf4', '45d':'45d', 'c7z':'c7z', + 'eq3':'eq3', 'bclh':'bclh' +} + + +# "M" below is Mg. If atoms are present in your system but not defined here +# you will need to add them here +#atom_weights = { +# 'H': 1.0, 'O': 15.9949100, 'C': 12.0000000, +# 'N': 12.0030700, 'S': 32.0650000, 'M': 23.3040000 +#} + +atom_weights = { + 1: 1.0, 8: 15.9949100, 6: 12.0000000, + 7: 12.0030700, 16: 32.0650000, 12: 23.3040000, 26: 55.845 +} + + +# Get input filename from command-line argument +inp = sys.argv[1] +# For testing, you might use a fixed input file: +# inp = 'g_952.inp' # e.g., a_22_304.inp. Input file with fragment coordinates + +# Read input file +with open(inp, "r") as orig_file: + input_lines = orig_file.readlines() + +# If good case found, output file name will be the same but .efp instead of .inp +# (Note: This file is only used if a match is found.) +outname = inp.replace('.inp', '.efp') + + + + +def get_RMSD(coords1,coords2,mass): + """ + Compute the weighted root-mean-square deviation (RMSD) between two sets of coordinates. + + Parameters: + coords1, coords2: 2D lists or arrays containing coordinate data. + weights: List of weights for each coordinate point. + + Returns: + Weighted RMSD. + """ + tot=0.0 + length=len(coords1) + dim=len(coords1[0]) + totmass=0.0 + + for i in range(length): + #Sum distances for each atom, weighted by atomic mass + tot+=mass[i]*sum([(coords1[i][j]-coords2[i][j])**2.0 for j in range(dim)]) + totmass+=mass[i] + + # Normalized RMSD calculation. + rmsd=np.sqrt(tot)/np.sqrt(totmass) + return rmsd + +def kabsch_algorithm(coords, coords2): + """ + Compute the optimal rotation matrix using the Kabsch algorithm to align coords onto coords2 + + Parameters: + coords: Coordinates to be rotated + coords2: Reference coordinates + + Returns: + rotation_matrix: Optimal rotation matrix. + com1: Center-of-mass (mean) of the first coordinates + com2: Center-of-mass (mean) of the second coordinates + """ + # Compute centers of mass + com1 = np.mean(coords, axis=0) + com2 = np.mean(coords2, axis=0) + + # Center the coordinates + coords1_centered = coords - com1 + coords2_centered = coords2 - com2 + + # Compute the covariance matrix + covariance_matrix = np.dot(coords1_centered.T, coords2_centered) + + # Compute the SVD of the covariance matrix + u, _, vh = np.linalg.svd(covariance_matrix) + rotation_matrix = np.dot(u, vh) + + #Check for unwanted reflections/make sure rotation is "right-handed" + if np.linalg.det(rotation_matrix) < 0: + rotation_matrix[:, -1] *= -1 + + return rotation_matrix, com1, com2 + +def apply_transform(coords, rotation_matrix, com1, com2): + """ + Reads: + coords: Coordinates to transform + rotation_matrix: Rotation matrix computed from the Kabsch algorithm + com1: Center-of-mass of the original coordinates + com2: Center-of-mass of the target coordinates + + Returns: + Transformed coordinates + """ + # Translate coordinates to the origin + translated_coords = coords - com1 + # Rotate the translated coordinates. + rotated_coords = np.dot(translated_coords, rotation_matrix) + # Translate to the target center of mass + transformed_coords = rotated_coords + com2 + return transformed_coords + + + + +# Extract the residue code from the input filename (assumes filename starts with a residue code) +try: + res = amino_acid_dict[inp.split('_')[0]] +except KeyError: + print('No library folder for: ' + inp) + sys.exit() + +directory = os.path.join(base_directory, res) + +# Define the file extension for matching EFP files. +file_extension = '.efp' + +# -------------------------- +# Read coordinates from the input file +# -------------------------- + +# (1 Angstrom = 0.529177249 Bohr) +ANGSTROM_TO_BOHR = 0.529177249 + +orig_coords = [] # list of [x, y, z] coordinates in Bohr +weights = [] # list of atomic weights corresponding to each coordinate +atom_names = [] + +reading_coords = False +for line in input_lines: + if '$end' in line: + reading_coords = False + elif reading_coords: + parts = line.split() + # Convert coordinates from Angstroms to Bohr + x = float(parts[2]) / ANGSTROM_TO_BOHR + y = float(parts[3]) / ANGSTROM_TO_BOHR + z = float(parts[4]) / ANGSTROM_TO_BOHR + + atom_names.append(parts[0]) + + orig_coords.append([x, y, z]) + # Use the first character of the atom type to lookup the weight + try: + #weights.append(atom_weights[parts[0][0]]) + weights.append(atom_weights[int(float(parts[1]))]) + except: + print('Atom with nuclear charge'+parts[1]+' is not defined. Define it and its atomic mass in `atom_weights`.') + elif 'C1' in line: + # Start reading coordinates after encountering 'C1' + reading_coords = True + +# -------------------------- +# Loop through EFP fragment files in the specified directory +# -------------------------- + +min_rmsd = 20.0 # Initialize with a large value; will be updated if a better match is found. +match = None # Will store the file path of the best match + +aligned_candidate_coords = [] + +# Loop over files with the specified extension in the directory. +for filename in os.listdir(directory): + if not filename.endswith(file_extension): + continue # Skip files that do not match the extension + + full_path = os.path.join(directory, filename) + + # Read the candidate file. + with open(full_path, "r") as candidate_file: + candidate_lines = candidate_file.readlines() + #print(f'Trying {full_path}') + + # Grab coordinates from the fragment file + # The coordinates are assumed to be in the section following the header "COORDINATES (BOHR)" + # and ending when a line containing 'BO21' is encountered, this is the standard name for the first "bond midpoint" + candidate_coords = [] + reading_candidate_coords = False + for line in candidate_lines: + if 'BO' in line.split()[0]: + reading_candidate_coords = False + break + if reading_candidate_coords: + parts = line.split() + x = float(parts[1]) + y = float(parts[2]) + z = float(parts[3]) + candidate_coords.append([x, y, z]) + continue + if 'COORDINATES ' in line: + reading_candidate_coords = True + continue + if 'STOP' in line: + break + + # Convert lists to numpy arrays + candidate_coords = np.array(candidate_coords) + fragment_coords = np.array(orig_coords) + + # Use the Kabsch algorithm to compute the optimal rotation to impost structure 1 onto structure 2 + rotation_matrix, com_candidate, com_fragment = kabsch_algorithm(candidate_coords, fragment_coords) + + # Apply the transformation + aligned_candidate_coords = apply_transform(candidate_coords, rotation_matrix, com_candidate, com_fragment) + + # Compute the weighted RMSD between the transformed coordinates and the input + current_rmsd = get_RMSD(aligned_candidate_coords, fragment_coords, weights) + #print(f'RMSD {current_rmsd}') + + # Update best match if current RMSD is lower. + if current_rmsd < min_rmsd: + min_rmsd = current_rmsd + match = full_path + +#with open(inp+'.xyz','w') as orig: +# orig.write('70\n\n') +# for i in range(len(fragment_coords)): +# orig.write(f' {atom_names[i]} {fragment_coords[i][0]*ANGSTROM_TO_BOHR} {fragment_coords[i][1]*ANGSTROM_TO_BOHR} {fragment_coords[i][2]*ANGSTROM_TO_BOHR}\n') + +#with open(inp+'.aligned.xyz','w') as aligned: +# aligned.write('70\n\n') +# for i in range(len(aligned_candidate_coords)): +# aligned.write(f' {atom_names[i]} {aligned_candidate_coords[i][0]*ANGSTROM_TO_BOHR} {aligned_candidate_coords[i][1]*ANGSTROM_TO_BOHR} {aligned_candidate_coords[i][2]*ANGSTROM_TO_BOHR}\n') + + +# -------------------------- +# Transform library .efp file to input coordinates if good match found. +# -------------------------- +print(f"{inp} with {match} RMSD {min_rmsd/1.8897259886:0.3f}") + +if min_rmsd < bohr_cutoff: +# # If a good match is found, run the next script with the matching file and the input file. + os.system("python " + flex_script_path + "/step4.Flexible_V7.py " + match + " " + inp + " -d -xr") +else: +# # If no match is found, print a message (or launch GAMESS). + print('No match, run GAMESS for: ' + inp) +# # Uncomment the following line if you want to run GAMESS: +# os.system('gms_slurm -p 20 -q standby -v 2025 ' + inp) diff --git a/examples/flex-EFP/Scripts/make_AAs.py b/examples/flex-EFP/Scripts/make_AAs.py index 876418f..df0f69b 100644 --- a/examples/flex-EFP/Scripts/make_AAs.py +++ b/examples/flex-EFP/Scripts/make_AAs.py @@ -2,112 +2,235 @@ """ Created on Sat Sep 28 15:13:19 2024 -@author: jackl - -This script creates GAMESS MAKEFP .inp files for amino acids or molecules located in the EFP region. -Non-amino acid molecules are treated with no virtual bonds (i.e. no broken bonds). - -Reads: - structure file with only EFP-region atoms and residues (.g96) - full structure file will all atoms and residues (.g96) - User-created text file with QM atoms and QM-MM covalent bond definitions (.txt) - Topology file from GROMACS molecular dynamics (.top or itp) - #WARNING# If you have your topology separated into several .itp files instead of one master .top file, - this script will have problems because atom numbers do not match between the structure and topology. - For general use, this is not a problem. If you have special QM-MM covalent "bridge-bonds" within - an .itp file (with different atom numbers than the full structure file), then this script will fail. - +@author: jackl & lyuda + +This script takes as input full system g96 file and efp-shell g96 file, as well as +the file with g96-like lines defining the QM region and QM-EFP boundaries, and system protein topology file. +Additionally, if some of the cofactors are not included in the protein topology file, +the path to directory with their itp files needs to be adjusted (PATH_TO_AMBER parameter). + +Sample execution: + python make_AAs.py shell_bchl361-79002.g96 bchl361-79002.g96 user_defined.txt topol.top 50000 wt + +- The script prepares makefp input files for amino acids located in the EFP region, taking care of +proper capping with hydrogens. +- Non-standard ligands are assumed to be a single fragment; however, fragmentation can be introduced +if needed by adding information into 'define_ligand_cut' fucntion. +- The script also creates MM region fragment (prot.efp) +- The script prepares text files with sections for Q-Chem QM/EFP calculations - +QM region section, fragment section (including water molecules and prot.efp fragment) + +Check carefully and adjust Global Data and Parameters section in case you have any non-standard +amino acids, co-factors, etc. + """ import numpy as np import sys +import os +import argparse # --------------------------- # Input Files and Command Line Arguments # --------------------------- # Get input filenames from command line -# Sample execution: python make_AAs.py efp_pair53004.g96 confout_pair53004.g96 user_defined.txt topol.top -efp_g96 = sys.argv[1] # EFP file ("efp_pair53004.g96") -full_g96 = sys.argv[2] # Full configuration file ("confout_pair53004.g96") -settings_file = sys.argv[3] # User settings file -topol_file = sys.argv[4] # Topology file ("topol.top") +parser = argparse.ArgumentParser( + description='Prepare GAMESS input files for QM/EFP calculations.' +) +parser.add_argument('efp_g96', help='EFP shell file in g96 format') +parser.add_argument('full_g96', help='Full system structure file in g96 format') +parser.add_argument('settings_file',help='User made file defining QM region and QM-EFP boundaries') +parser.add_argument('topol_file', help='Topology file (topol.top)') +parser.add_argument('timestamp', help='Timestamp to differentiate output files (50000)') +parser.add_argument('mutant', help='Mutant label (wt)') +args = parser.parse_args() -# Read the EFP file lines -with open(efp_g96, 'r') as f: - efp_lines = f.readlines() - -# Read the full configuration file lines -with open(full_g96, 'r') as f: - full_lines = f.readlines() +efp_g96 = args.efp_g96 +full_g96 = args.full_g96 +settings_file = args.settings_file +topol_file = args.topol_file +timestamp = args.timestamp +mutant = args.mutant # --------------------------- # Global Data and Parameters # --------------------------- -# Dictionary of amino acid charges. Assumes any N-terminal is +1 and any C-terminal is -1. -AA_charge = { - 'ASP': '-1', 'GLU': '-1', 'HIP': '1', - 'LYS': '1', 'ARG': '1', 'HISH': '1' -} -# Amino acids with non-zero charge -spec_AAs = ['ASP', 'GLU', 'HIP', 'LYS', 'ARG', 'HISH'] -# Map full amino acid names to one-letter codes for filenames -amino_acid_dict = { - 'ALA': 'a', 'ARG': 'r', 'ASN': 'n', 'ASP': 'd', 'CYS': 'c', - 'GLN': 'q', 'GLU': 'e', 'GLY': 'g', 'HIS': 'h', 'ILE': 'i', - 'LEU': 'l', 'LYS': 'k', 'MET': 'm', 'PHE': 'f', 'PRO': 'p', - 'SER': 's', 'THR': 't', 'TRP': 'w', 'TYR': 'y', 'VAL': 'v', - 'HIP': 'hp', 'HID': 'hd', 'HIE': 'he', 'NVAL': 'v', 'CGLN': 'q' +# directory with itp topolgies of non-standard cofactors +PATH_TO_AMBER = '/depot/lslipche/data/PS1/WT/amber03.ff/' + +# g96 atom names with more than 1 character that need to be renamed for Q-Chem calculations +# onl relevant for atoms in QM region +ATOM_EXCEPTIONS = {'mg1' : 'Mg', 'MG' : 'Mg'} + +# charge and multiplicity of QM region in QM/EFP calculation +qm_charge = 0 +qm_multiplicity = 1 + +nm2Bohr = 18.897161646321 # nm -> Bohr + +# RESNAMEs in g96 that should be completely ignored for either efp, mm or qm regions + # (XXX is a link atom, generally) +skip_resnames=['XXX'] + +#All non-amino acid residue names with separate topologies should be in this list +special_cofactors = ['ECH', '45D', 'EQ3', 'C7Z', 'CLA', 'PQN', 'BCR', 'QLA', 'LHG', 'LMG', 'SQD', 'LMT'] + +# 'SOL' returns the charge of oxygen in the water model (TIP3P). H charge is taken to be -1/2 * oxygen charge. +# Change these for different water models. +WATER_AND_IONS = { + 'SOL': -0.834, + 'CL': -1.0, + 'NA': 1.0 } +ions = ['NA', 'CL'] + +# used water names +waters = ['SOL', 'QSL'] + # List of known amino acids +# these aminoacids follow standard amino acid fragmentation and +# found in main topogly file known_amino_acids = [ 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', - 'TYR', 'VAL', 'HIP', 'HID', 'HIE', 'HISE', 'HISD', 'HISH' -] + 'TYR', 'VAL', 'HIP', 'HID', 'HIE', 'HISE', 'HISD', 'HISH', 'ACYS' + ] -# Atomic symbols and associated (string) numbers +# Atomic symbols and associated nuclear charges in string format at_sym = { 'H': '1.0', 'C': '6.0', 'N': '7.0', 'O': '8.0', 'MG': '12.0', 'P': '15.0', 'S': '16.0', 'FE': '26.0', 'NA': '11.0', 'CL': '17.0' } + +# ligands whose efp fragments should be splitted, (typically becase of size considerations) +# make sure to include rules for those cuts in define_ligand_cut() function +cut_ligands = ['CLA','LHG','LMG','BCL'] + +# residues that require special EFP treatment (different makefp setup etc). Develop as needed +fancy_ligands = ['SF4'] + +# non-zero charges in ligands that will be split into several fragments +cut_ligand_charges = {'LHG':['tail', -1]} # LGH tail charge -1 + +# non-zero charges in non-fragmented ligands +ligand_charges = {} + +# Dictionary of amino acid charges. Assumes any N-terminal is +1 and any C-terminal is -1. +AA_charge = { + 'ASP': -1, 'GLU': -1, 'HIP': 1, + 'LYS': 1, 'ARG': 1, 'HISH': 1 +} + +# Amino acids with non-zero charge +charged_AAs = ['ASP', 'GLU', 'HIP', 'LYS', 'ARG', 'HISH'] + +# Map full amino acid names to one-letter codes for efp filenames +amino_acid_dict = { + 'ALA': 'a', 'ARG': 'r', 'ASN': 'n', 'ASP': 'd', 'CYS': 'c', + 'GLN': 'q', 'GLU': 'e', 'GLY': 'g', 'HIS': 'h', 'ILE': 'i', + 'LEU': 'l', 'LYS': 'k', 'MET': 'm', 'PHE': 'f', 'PRO': 'p', + 'SER': 's', 'THR': 't', 'TRP': 'w', 'TYR': 'y', 'VAL': 'v', + 'HIP': 'hp', 'HID': 'hd', 'HIE': 'he', 'NVAL': 'v', 'CGLN': 'q', + 'HISE':'he', 'HISD':'hd','HISH':'hp','ACYS':'acys' +} + +# distance in Angstroms for adding capping Hydrogens +CH_distance = 1.07886 +SH_distance = 1.34 + # --------------------------- # Function Definitions # --------------------------- -def GAMESS_template(charge, fragname): - """ - Builds the GAMESS MAKEFP input header - You may want to adjust these parameters +# Makefp header. Works for most cases. +def makefp_header(charge, comment): + return ( + f" $contrl units=angs local=boys runtyp=makefp \n" + f" mult=1 icharg={charge} coord=cart icut=11 $end\n" + f" $system timlim=99999 mwords=200 $end\n" + f" $scf soscf=.f. dirscf=.t. diis=.t. CONV=1.0d-06 $end\n" + f" $basis gbasis=n31 ngauss=6 ndfunc=1 $end\n" + f" $DAMP IFTTYP(1)=2,0 IFTFIX(1)=1,1 thrsh=500.0 $end\n" + f" $MAKEFP POL=.t. DISP=.f. CHTR=.f. EXREP=.f. $end\n" + f" $data\n {comment}\n C1\n" + ) - Parameters - ---------- - charge : integer - fragment charge - fragname : string - fragment name +# Makefp header. Works for SF4 ligand. +def makefp_special_header(comment): + return ( + f" $contrl units=angs runtyp=makefp maxit=200 local=boys pp=sbkjc\n" + f" mult=1 icharg=-2 coord=cart icut=11 $end\n" + f" $local maxloc=600 cvgloc=1.0E-6 $end\n" + f" $system timlim=99999 memddi=0 mwords=200 $end\n" + f" $scf soscf=.f. dirscf=.t. diis=.t. CONV=1.0d-05 fdiff=.f.\n" + f" shift=.t. $end\n" + f" $basis gbasis=sbkjc ndfunc=1 $end\n" + f" $cphf ncphf=200 $end\n" + f" $DAMP IFTTYP(1)=2,0 IFTFIX(1)=1,1 thrsh=500.0 $end\n" + f" $MAKEFP POL=.t. DISP=.f. CHTR=.f. EXREP=.f. $end\n" + f" $data\n" + f" sf4_1_{comment}\n" + f" C1\n" + ) + + +def define_ligand_cut(RESNAME): + Rings = [] + tailside = '' + headside = '' + + # Settings for fragment splitting. + #CLA. the bond between atoms C5 and C6 is where the fragments split. + if RESNAME=='CLA': + tailside = 'CAA' # Atom name defining the tail side boundary + headside = 'C2A' # Atom name defining the head side boundary + Rings = [ + 'mg1','h11','nc1','MG','CHA','CHB','H1','CHC','H2','CHD','H3','NA','C1A','C2A','H4', + 'C3A','H5','C4A','CMA','H6','H7','H8','NB','C1B','C2B','C3B','C4B', + 'CMB','H13','H14','H15','CAB','H16','CBB','H17','H18','NC','C1C', + 'C2C','C3C','C4C','CMC','H19','H20','H21','CAC','H22','H23','CBC', + 'H24','H25','H26','ND','C1D','C2D','C3D','C4D','CMD','H27','H28', + 'H29','CAD','OBD','CBD','H30','CGD','O1D','O2D','CED','H31','H32','H33' + ] + + # BCL + elif RESNAME=='BCL': + tailside = 'CAA' # Atom name defining the tail side boundary + headside = 'C2A' # Atom name defining the head side boundary + Rings = ['MG','CHA','CHB','HB','CHC','HC','CHD','HD','NA','C1A','C2A','H2A','C3A','H3A','C4A', + 'CMA','HMA1','HMA2','HMA3','NB','C1B','C2B','C3B','C4B','CMB','HMB1','HMB2','HMB3', + 'CAB','OBB','CBB','HBB1','HBB2','HBB3','NC','C1C','C2C','H2C','C3C','H3C','C4C', + 'CMC','HMC1','HMC2','HMC3','CAC','HAC1','HAC2','CBC','HBC1','HBC2','HBC3','ND', + 'C1D','C2D','C3D','C4D','CMD','HMD1','HMD2','HMD3','CAD','OBD','CBD','HBD','CGD', + 'O1D','O2D','CED','HED1','HED2','HED3'] - Returns - ------- - template : string - GAMESS MAKEFP input template - """ - template = f""" $contrl units=angs local=boys runtyp=makefp - mult=1 icharg={charge} coord=cart icut=11 $end - $system timlim=99999 mwords=200 $end - $scf soscf=.f. dirscf=.t. diis=.t. CONV=1.0d-06 $end - $basis gbasis=n31 ngauss=6 ndfunc=1 $end - $DAMP IFTTYP(1)=2,0 IFTFIX(1)=1,1 thrsh=500.0 $end - $MAKEFP POL=.t. DISP=.f. CHTR=.f. EXREP=.f. $end - $data - {fragname} - C1 - """ - return template + #LMG + elif RESNAME=='LMG': + tailside = 'C11' # Atom name defining the tail side boundary + headside = 'C12' # Atom name defining the head side boundary + Rings = ['C12','H19','H20','C13','H21','H22','C14','C15','C16','C17','C18','C19','C20', + 'C21','C22','C23','C24','C25','C26','C27','H23','H24','H25','H26','H27','H28', + 'H29','H30','H31','H32','H33','H34','H35','H36','H37','H38','H39','H40','H41', + 'H42','H43','H44','H45','H46','H47','H48','H49','H50','H51' + ] + #LHG + elif RESNAME=='LHG': + tailside = 'C8' # Atom name defining the tail side boundary + headside = 'C9' # Atom name defining the head side boundary + Rings = ['C9','C10','C13','C14','C15','C16','C17','C18','C19','C20','C21','C22','C23','C24', + 'H15','H16','H17','H18','H21','H22','H23','H24','H25','H26','H27','H28','H29','H30', + 'H31','H32','H33','H34','H35','H36','H37','H38','H39','H40','H41','H42','H43','H44', + 'H45' + ] + else: + print('Wrong resname given to define_ligand_cut. Exit') + sys.exit() + return headside, tailside, Rings def qm_atoms(set_file): @@ -137,8 +260,9 @@ def qm_atoms(set_file): print('Atom number: ' + line.split()[3] + ' not found in QM atoms. Check bridge atom order.') break elif i == 1: - QMs.append(int(line.split()[3])) - pol_rem.append(int(line.split()[3])) + if len(line.split()) > 3: + QMs.append(int(line.split()[3])) + pol_rem.append(int(line.split()[3])) # Look for QM_atoms section. elif qm == 1: if 'QM-MM' in line: @@ -151,7 +275,7 @@ def qm_atoms(set_file): return QMs, pol_rem -def cut_frag(head, tail): +def cut_frag(head, tail, distance): """ Calculate coordinates for a virutal hydrogen using a desired bond distance. @@ -162,7 +286,7 @@ def cut_frag(head, tail): Returns: A list with the virtual coordinate [x, y, z] for the H-atom. """ - desired_dist = 1.07886 # Desired bond distance + desired_dist = distance # Desired bond distance # Multiply by 10 to scale the coordinates. xh, yh, zh = [float(head.split()[i]) * 10 for i in range(4, 7)] xt, yt, zt = [float(tail.split()[i]) * 10 for i in range(4, 7)] @@ -176,49 +300,95 @@ def cut_frag(head, tail): ] return h_t +def make_standard_inp(fragment, res_type): + """ + Generate standard makefp input + + Parameters + ---------- + fragment : list of atom lines in g96 format + res_type : string with residue type + + Returns + ------- + None. -def make_inp(fragment, QMs, POLs): """ - Generate an input (.inp) file for a fragment. + if len(fragment) < 3: + print(f'Fragment {fragment[0]} has less than 3 atoms. Standard makefp input will not work well. Take care!') + sys.exit() - Parameters: - fragment: List of lines containing atoms in the fragment. - QMs: List of QM atom names to be marked for removal. - POLs: List of atom names to have polarization points removed. - """ - lines_out = [] - num_virtuals = 0 charge = 0 + filename = '' - # Count virtual atoms (labeled as 'H000') + qm_atoms = [] + mm_atoms = [] + H_caps = 0 for atom in fragment: if 'H000' in atom: - num_virtuals += 1 - - # If all atoms are either QM or virtual atoms, do not write an input. - if len(QMs) + num_virtuals == len(fragment): + H_caps += 1 + continue + if int(atom.split()[3]) in qm_IDs: + qm_atoms.append(atom.split()[2]) + if int(atom.split()[3]) in MM_remove: + mm_atoms.append(atom.split()[2]) + + if H_caps + len(qm_atoms) == len(fragment): + print(f'Fragment {fragment[0]} is completely in QM region. We will not build its makefp file. Continue.') return - + # Determine the charge based on the residue type. - res_info = fragment[4].split()[1] - if res_info in spec_AAs: - charge = AA_charge[res_info] - elif len(res_info) == 4: - # Assume N-terminal or C-terminal if the residue code starts with N or C. - if res_info[0] == 'N': - charge = 1 - elif res_info[0] == 'C': - charge = -1 + resname = fragment[0].split()[1] + if res_type == 'ligand': # normal ligand + if resname in ligand_charges.keys(): + charge = ligand_charges[resname] + elif res_type == 'head' or res_type == 'tail': # cut ligand + if resname in cut_ligand_charges.keys(): + if cut_ligand_charges[resname][0] == res_type: + charge = cut_ligand_charges[resname][1] + # taking care of AA charge. Account for charged AAs, terminal AAs and combinations + elif res_type in ['aa', 'naa', 'caa']: + if resname[0] == 'N' and resname[1:] in known_amino_acids : # N terminal AA + charge += 1 + if resname[0] == 'C' and resname[1:] in known_amino_acids : # C terminal AA + charge -= 1 + if resname in charged_AAs : + charge += AA_charge[resname] + if resname[1:] in charged_AAs: + charge += AA_charge[resname[1:]] + else: + print(f'Do not know how to determine charge for residue {resname}! Assume charge 0. Check!') + # Construct the filename based on residue information. #Use 1 letter identifier for Amino acids, otherwise filename will match residue name - if res_info in known_amino_acids: - filename = amino_acid_dict[res_info] + '_' + fragment[4].split()[0] + '_' + fragment[0].split()[3] + '.inp' - else: - filename = res_info.lower() + '_' + fragment[4].split()[0] + '_' + fragment[0].split()[3] + '.inp' + if resname in known_amino_acids: + filename = amino_acid_dict[resname] + '_' + fragment[2].split()[0] + '_' + fragment[0].split()[3] + '_' + timestamp + '_'+mutant + # adding c or n to terminal AAs + elif resname[1:] in known_amino_acids: + filename = resname[0].lower() + amino_acid_dict[resname[1:]] + '_' + fragment[2].split()[0] + '_' + fragment[0].split()[3] + '_' + timestamp + '_'+mutant + elif res_type == 'ligand': + filename = resname.lower() + '_' + fragment[0].split()[0] + '_' + fragment[0].split()[3] + '_' + timestamp + '_'+mutant + elif res_type == 'head': + filename = resname.lower() + 'h_' + fragment[0].split()[0] + '_' + fragment[0].split()[3] + '_' + timestamp + '_'+mutant + elif res_type == 'tail': + filename = resname.lower() + 't_' + fragment[0].split()[0] + '_' + fragment[0].split()[3] + '_' + timestamp + '_'+mutant + else : + filename = resname.lower() + '_' + fragment[0].split()[0] + '_' + fragment[0].split()[3] + '_' + timestamp + '_'+mutant + print(f'Do not know how to determine makefp filename for residue {resname}! Filename {filename} given.') + + + # add fragname to frag_list for future use + frag_list.append(filename) + + # add fragment to dictionary with fragment makefp file naame and first three coordiante lines for easy creating (QM/)EFP inputs + frag_list_info.update(make_frag_info(filename, fragment, qm_atoms)) + filename=filename+'.inp' + + lines_out = [] # Build header sections for the input file. - lines_out.append(GAMESS_template(charge, filename.split('.')[0])) + lines_out.append(makefp_header(charge, filename.split('.')[0])) # Process each atom line. for atom in fragment: @@ -231,10 +401,12 @@ def make_inp(fragment, QMs, POLs): col1 = f" {atom.split()[2]}".ljust(6) # Determine atomic number based on element symbol. # Two-letter atomnames will need to be added here; this is a lazy solution. - if atom.split()[2][0] == 'M': + if 'MG' in atom.split()[2].upper(): col2 = at_sym['MG'] + elif 'FE' in atom.split()[2].upper(): + col2 = at_sym['FE'] else: - col2 = at_sym[atom.split()[2][0]] + col2 = at_sym[atom.split()[2][0].upper()] # Get coordinates, scaling by 10. Then format x, y, z = [float(atom.split()[i]) * 10 for i in range(4, 7)] col3 = f"{x:.8f}".rjust(17) @@ -244,178 +416,833 @@ def make_inp(fragment, QMs, POLs): # Append comments regarding atoms to be removed later. lines_out.append(" $end \n!comment atoms to be erased:") - for atomname in QMs: + for atomname in qm_atoms: lines_out.append(" " + str(atomname)) lines_out.append("\n") # Append comments regarding atoms that will have polarization removed. lines_out.append("!polarization points to remove:") - for atomname in POLs: - lines_out.append(" " + str(atomname)) # Write the constructed lines to the file. with open(filename, 'w') as outfile: outfile.writelines(lines_out) + + +# compares the distance between two atoms given in g96 format +def pos_match(line1, line2): + match = False + part1 = line1.split() + part2 = line2.split() + # 4,5,6 columns + pos1 = np.array([float(part1[4]), float(part1[5]), float(part1[6])]) + pos2 = np.array([float(part2[4]), float(part2[5]), float(part2[6])]) + dist = np.linalg.norm(pos2-pos1) + # do not use shift for the first match + if dist < 0.01 : + match = True + return match + +# compares the resname and resid between two atoms given in g96 format +def residue_match(line1, line2): + match = False + part1 = line1.split() + part2 = line2.split() + # 0 and 1 columns + if part1[0] == part2[0] and part1[1] == part2[1]: + match = True + return match + +def read_ligand_topol(resname): + #print(f'Read topology of ligand {resname}') + start=0 + out_charges=[] + with open(PATH_TO_AMBER +resname+'.itp','r') as itp: + itp_lines=itp.readlines() + for line in itp_lines: + if start==1 and len(line.split()) >= 8: + if ';' not in line.split()[0]: + out_charges.append(float(line.split()[6])) + #out_dict[line.split()[0]]=line.split()[6] + if '[ atoms ]' in line: + start=1 + if '[ bonds ]' in line: + break + return out_charges + +def read_ion_water_charges(resname): + if resname == 'SOL': + o_charge = WATER_AND_IONS['SOL'] + charges = [o_charge, -o_charge/2, -o_charge/2] + elif resname in WATER_AND_IONS: + charges = [WATER_AND_IONS[resname]] + else: + print(f'Unknown resname {resname} in function read_ion_water_charges. Make new charge entry in WATER_AND_IONS!!') + sys.exit() + return charges + +def residue_type(resname): + if not isinstance(resname, str): + print(f"Resname {resname} is not a string. Error.") + sys.exit() + res_type = 'none' + if resname in skip_resnames: + res_type = 'skip' + return res_type + if [aa for aa in known_amino_acids if aa in resname]: # this line should take care for normal and terminal AAs + if resname[0] == 'N' and resname[1:] in known_amino_acids : # N terminal AA + res_type = 'naa' + elif resname[0] == 'C' and resname[1:] in known_amino_acids : # C terminal AA + res_type = 'caa' + elif resname in known_amino_acids : + res_type = 'aa' + else: + res_type = 'ligand' + print(f'Residue {resname} is assigned as ligand. Check whether this is corrcet.') + elif resname in cut_ligands: + res_type = 'cut_ligand' + elif resname in fancy_ligands: + res_type = 'fancy' + elif resname in waters: + res_type = 'water' + elif resname in ions: + res_type = 'ion' + else: + res_type = 'ligand' + + return res_type + +def update_charges(line1, line2, charge): + charge1 = float(line1.split()[-1]) + charge/2 + charge2 = float(line2.split()[-1]) + charge/2 + #dcharge = (charge1 + charge2)/2 + part1 = line1.rsplit(' ', 1) + if len(part1) > 1: + new_line1 = part1[0] + ' ' + str(charge1) # assume first atom is C and its charge is larger (positive) than charge of O + else: + print(f'Trouble splitting {line1}') + sys.exit() + part2 = line2.rsplit(' ', 1) + if len(part2) > 1: + new_line2 = part2[0] + ' ' + str(charge2) + else: + print(f'Trouble splitting {line2}') + sys.exit() + return new_line1, new_line2 +def update_charge(line1, charge): + charge1 = float(line1.split()[-1]) + charge + part1 = line1.rsplit(' ', 1) + if len(part1) > 1: + new_line1 = part1[0] + ' ' + str(charge1) # assume first atom is C and its charge is larger (positive) than charge of O + else: + print(f'Trouble splitting {line1}') + sys.exit() + return new_line1 -def QM_MM_covalent(MM, QMs, topol_file): +def print_qchem_qm_lines(qm_info_file, g96_lines): """ - For a given MM atom that is covalently bound to a QM atom, find other MM atoms that are covalently bonded - to the given atom but are not in the QM region. + Process the QM input file lines to generate QM coordinates. - This function parses the topology file for bonds and returns a list of atom IDs. - WARNING this script assumes matching atom IDs between the full structure .g96 and the topology .top files; - -the script cannot see how your topology is defined. + The function scans through the QM file until it finds a line with 'boundary'. + For lines in the QM_atoms section (after 'QM_atoms' is encountered), + it converts coordinate values and formats them. Parameters: - MM: The MM atom index. - QMs: List of QM atom indices. - topol_file: Path to the topology file. + qm_lines (list of str): Lines from the user deifned text file. + g96_lines (list of str): Lines from the cleaned g96 list (with charges and no headers. Returns: - bond_IDs: List of MM atom indices covalently bonded to the given MM atom. + A list of formatted coordinate lines. """ - bond_IDs = [] - bonds_section = False - with open(topol_file, 'r') as f: - for line in f: - # Identify the bonds section. - if '[ bonds ]' in line: - bonds_section = True - continue - elif bonds_section and len(line.split()) < 2: - # End of bonds section. - return bond_IDs - elif bonds_section and line.strip().startswith(';'): - # Skip comment lines. - continue - elif bonds_section: - atom1 = int(line.split()[0]) - atom2 = int(line.split()[1]) - # If one atom is given MM and the other is not in the QM region, add the non‐QM atom. - if atom1 == MM and atom2 not in QMs: - bond_IDs.append(atom2) - elif atom2 == MM and atom1 not in QMs: - bond_IDs.append(atom1) - return bond_IDs + + qm_lines = [] + with open(qm_info_file, 'r') as file: + qm_lines = file.readlines() + + bridge_IDs=[] + qm_IDs=[] + bridge_pairs = [] + start='none' + for line in qm_lines: + if 'QM_atoms' in line: + start = 'QM' + continue + # After 'QM_atoms' is encountered, process lines with sufficient columns. + if start=='QM' and (len(line.split()) > 4): + # Format the atom label: + qm_IDs.append(int(line.split()[3])) + # When we reach the boundary marker, finish the QM section, find bonds to cap. + if 'QM-MM boundary boundary' in line: + continue + if 'boundary' in line: + start='boundary' + continue + if start=='boundary': + bridge_IDs.append(line.split()[3]) + #print(bridge_IDs) + if(len(bridge_IDs)%2==0): + bridge_pairs.append([int(bridge_IDs[-2]), int(bridge_IDs[-1])]) + start='none' + + #print(bridge_pairs) + # assume g96 atomid is continious and unique!!! + g96_qm_lines = [] + for atom in qm_IDs: + if atom == int(g96_lines[atom-1].split()[3]): + g96_qm_lines.append(g96_lines[atom-1]) + else: + print(f'{atom} does not match g96_lines[atom-1]') + + # format QM atoms for Q-Chem + # converting from nm to Angstroms + outlines = [] + for line in g96_qm_lines: + # Format the atom label: + parts = line.split() + # Use full label if the atom is in exceptions, otherwise take the first letter. + if parts[2] in ATOM_EXCEPTIONS.keys(): + atom_name = ATOM_EXCEPTIONS[parts[2]] + else: + atom_name = parts[2][0].upper() + outlines.append(f'{atom_name} {float(parts[4])*10:.8f} {float(parts[5])*10:.8f} {float(parts[6])*10:.8f}') + + + for pair in bridge_pairs: + qm_line = '' + mm_line = '' + if pair[0] == int(g96_lines[pair[0]-1].split()[3]): + qm_line = g96_lines[pair[0]-1] + else: + print(f'{pair[0]} does not match g96_lines[pair[0]-1]') + if pair[1] == int(g96_lines[pair[1]-1].split()[3]): + mm_line = g96_lines[pair[1]-1] + else: + print(f'{pair[1]} does not match g96_lines[pair[1]-1]') + #print('Bridge') + #print(qm_line) + #print(mm_line) + virt_coords=cut_frag(qm_line,mm_line,CH_distance) + outlines.append(f'H {virt_coords[0]:.8f} {virt_coords[1]:.8f} {virt_coords[2]:.8f}') + + return outlines + +def make_frag_info(filename, frag, qm_atoms): + lines = [] + lines.append(filename) + counter = 0 + for i in range(len(frag)): + if frag[i].split()[2] not in qm_atoms: + atom_name = 'A0' + str(i+1) + frag[i].split()[2] + x,y,z = [float(frag[i].split()[j])*10.0 for j in range(4, 7)] + line = f'{atom_name} {x:.8f} {y:.8f} {z:.8f}' + lines.append(line) + counter += 1 + if counter == 3: + break + return {filename:lines} # --------------------------- # Main Script Processing # --------------------------- -# Determine QM region atoms and polarization removal atoms from the settings file. +# Read the EFP file lines +with open(efp_g96, 'r') as f: + efp_lines = f.readlines() + +# Read the full configuration file lines +with open(full_g96, 'r') as f: + full_lines = f.readlines() + + +# Determine QM region atoms and MM removal atoms from the settings file. qm_IDs, MM_remove = qm_atoms(settings_file) -# For each MM atom covalently bound to the QM region, find the other MM atoms bound -# to it. These atoms will have polarization points removed. -pol_remove = [] -for atom in MM_remove: - found_pol_remove = QM_MM_covalent(atom, qm_IDs, topol_file) - for bound_atom in found_pol_remove: - pol_remove.append(bound_atom) +# storage for EFP fragment names and first three coordinates +frag_list_info = {} # --------------------------- # Process EFP Region Residues # --------------------------- -# Get a list of unique residue IDs from the EFP file. +# Get a list of unique residue IDs first lines from the EFP file. # Note that atom IDs do not match between EFP and full structures. However, residue IDs do match -efp_resis = [] -prevres = '0' + +prev_resid = '' +prev_resname = '' +efp_residues = [] start = False for line in efp_lines: if start: - if len(line.split()) < 3: + if 'END' in line: break - #only count residues once - if line.split()[0] != prevres: - efp_resis.append(line.split()[0]) - prevres = line.split()[0] + + if line.split()[0] != prev_resid or line.split()[1] != prev_resname: + if residue_type(line.split()[1]) != 'skip': + efp_residues.append(line) + prev_resid = line.split()[0] + prev_resname = line.split()[1] + if 'POSITION' in line: start = True -# --------------------------- -# Process Fragments from Full Configuration File -# --------------------------- -i = 0 # Index for current EFP residue -prev_co = [] # List to store previous "C" or "O" atom lines; these will be 1 residue number "off" from .g96 convention -frag = [] # List to append all fragment lines -CAs = [] # List to accumulate "CA" (alpha carbon) lines -start = False - -#"C", "O", and "CA" atoms are significant. We redefine residues using these atom names +################## +######## Processing full g96 and topology files to make a new list containing both atomids and charges +################## +# Read the topology file +topol_lines = [] +atomid_charge_dict = {} +with open(topol_file, 'r') as f: + start = 0 + while f: + line = f.readline() + if '[ atoms ]' in line: + start = 1 + continue + if '[ bonds ]' in line: + break + if start == 1 and len(line.split()) >= 8 : + if ';' not in line.split()[0]: + topol_lines.append(line) + atomid_charge_dict.update({int(line.split()[5]) : float(line.split()[6])}) + +# making g96_charges with combined info of (stripped from top and bottom) g96 and MM charges +g96_charges = [] +start = 0 +counter = 0 +charges = [] +resid_old = -1 for line in full_lines: - if 'END' in line: - if start: - break - #elif 'SOL' in line: - elif 'SOL' in line or 'QSL' in line: #Sol is standard water, QSL is my QM region water, this is not standardized + if 'POSITION' in line: #start processing + start = 1 continue - elif start: - # Collect CA atoms. - if line.split()[2] == 'CA': - CAs.append(line) - # Collect "O" atoms. - elif line.split()[2] == 'O': - prev_co.append(line) - # Process "C" atoms. This is the "end" of a residue as we would like to define it. - elif line.split()[2] == 'C': - prev_co.append(line) - # If the current line belongs to the current EFP residue. - if line.split()[0] == efp_resis[i]: - # standard amino acid fragments (non-terminals, non-cofactors) need two virtual hydrogens. - if line.split()[1] in known_amino_acids: - vH1 = cut_frag(frag[0], CAs[-2]) #Cut between current C and previous CA - vH2 = cut_frag(CAs[-1], line) #Cut between current CA and next C - frag.append(f" H000 1.0 {vH1[0]:.8f} {vH1[1]:.8f} {vH1[2]:.8f}\n") - frag.append(f" H000 1.0 {vH2[0]:.8f} {vH2[1]:.8f} {vH2[2]:.8f}\n") - # If the residue identifier (field 1) has length 4 (a non-standard amino acid), - # determine if it is N-terminal or C-terminal. - elif len(line.split()[1]) == 4: - if line.split()[1][1:4] in known_amino_acids: - if line.split()[1][0] == 'N': # N-terminal residue - vH2 = cut_frag(CAs[-1], line) - frag.append(f" H000 1.0 {vH2[0]:.8f} {vH2[1]:.8f} {vH2[2]:.8f}\n") - elif line.split()[1][0] == 'C': # C-terminal residue - vH1 = cut_frag(frag[0], CAs[-2]) - frag.append(f" H000 1.0 {vH1[0]:.8f} {vH1[1]:.8f} {vH1[2]:.8f}\n") - # If there is enough fragment data, determine which atoms to remove. - if len(frag) > 1: - qm_names = [] - pol_names = [] - for atom in frag: - if 'H000' in atom: #All virtual hydrogens will be removed, no need to specify in comment + if start == 1: + if 'END' in line: + break + # working with protein topology + if int(line.split()[3]) <= len(atomid_charge_dict): # process protein topolgy lines. Assume that g96 atom ids are in order + atomid = int(line.split()[3]) + charge = atomid_charge_dict[atomid] + g96_charges.append(line.rstrip() + ' ' + str(charge)) + # working with cofactor topologies + else: + if line.split()[1] not in special_cofactors and line.split()[1] not in WATER_AND_IONS: # Error + print(f'Error! cofactor {line.split()[1]} is unknown') + print(line) + sys.exit() + elif resid_old != line.split()[0]: + if line.split()[1] in special_cofactors: + charges = read_ligand_topol(line.split()[1]) + elif line.split()[1] in WATER_AND_IONS: + charges = read_ion_water_charges(line.split()[1]) + counter = 0 + resid_old = line.split()[0] + g96_charges.append(line.rstrip() + ' ' + str(charges[counter])) + counter += 1 + +################### + +# working with g96_charges list now +mm_lines = [] # not EFP atoms +efp_lines = [] # atoms that will be in EFP region +efp_counter = 0 + +resid_old = '' +resname_old = '' +efp_residue = 'none' # 'none', 'aa', 'naa', 'caa', 'fancy','ligand', 'cut_ligand', 'water', 'ion' +CO = [] +CA = [] +efp_residue_old = 'none' + +# for cut fragments +headside = '' +tailside = '' +ring = [] +head = [] +tail = [] +head_cut = '' +tail_cut = '' + +frag = [] +efp_water_list = [] # list to store EFP waters +efp_ion_list = [] # list to store EFP ions if any +frag_list = [] + +accumulated_charge = 0.0 +CO_charge = 0.0 +CO_charge_next = 0.0 +CA_index = -1 +CA_index_next = -1 +CA_charge_index_list = [] + +fancy_list = [] + +for j in range(len(g96_charges)): + line = g96_charges[j] + + # check whether resid changed. If it did, do all work on (possibly) fragment that just ended + if resid_old != line.split()[0] or resname_old != line.split()[1]: + # take care of previous residue + if efp_residue == 'aa': + # take care of CO things + + vH2 = cut_frag(CA[-1], frag[-2], CH_distance) #Cut between current CA and next C + vH1 = cut_frag(CO[0], CA[-2], CH_distance) #Cut between current C and previous CA + frag.pop() + frag.pop() + + CO_charge_next = float(g96_charges[j-1].split()[-1]) + float(g96_charges[j-2].split()[-1]) + #line1, line2 = update_charges(g96_charges[j-2], g96_charges[j-1], 0.0) + mm_lines.append(g96_charges[j-2]) + mm_lines.append(g96_charges[j-1]) + + accumulated_charge += float(g96_charges[j-1].split()[-1]) + float(g96_charges[j-2].split()[-1]) + + for co in CO: + frag.append(co) + frag.append(f" H000 1.0 {vH1[0]:.8f} {vH1[1]:.8f} {vH1[2]:.8f}\n") + frag.append(f" H000 1.0 {vH2[0]:.8f} {vH2[1]:.8f} {vH2[2]:.8f}\n") + + elif efp_residue == 'naa': # N-terminal residue + vH2 = cut_frag(CA[-1], frag[-2], CH_distance) + frag.pop() + frag.pop() + + CO_charge_next = float(g96_charges[j-1].split()[-1]) + float(g96_charges[j-2].split()[-1]) + #line1, line2 = update_charges(g96_charges[j-2], g96_charges[j-1], 0.0) + mm_lines.append(g96_charges[j-2]) + mm_lines.append(g96_charges[j-1]) + #mm_lines.append(g96_charges[j-2]) + #mm_lines.append(g96_charges[j-1]) + accumulated_charge += float(g96_charges[j-1].split()[-1]) + float(g96_charges[j-2].split()[-1]) + + frag.append(f" H000 1.0 {vH2[0]:.8f} {vH2[1]:.8f} {vH2[2]:.8f}\n") + + elif efp_residue == 'caa': # C-terminal residue + vH1 = cut_frag(CO[0], CA[-2], CH_distance) + for co in CO: + frag.append(co) + frag.append(f" H000 1.0 {vH1[0]:.8f} {vH1[1]:.8f} {vH1[2]:.8f}\n") + + elif efp_residue == 'cut_ligand': + # separate in head and tail + head = [] + tail = [] + tail_cut = '' + head_cut = '' + + for atom in frag: + # Get the current atom name, add line to the correct fragment + atomname = atom.split()[2] + + # Append the line into the desired fragment based on the Rings list. + if atomname in ring: + head.append(atom) + else: + tail.append(atom) + # Save the line for later use if it matches the headside or tailside. + if atomname == tailside: + tail_cut = atom + elif atomname == headside: + head_cut = atom + + if not head_cut: + print(f'head_cut atom is not found in special ligand {efp_residues[efp_counter-1]}') + sys.exit() + if not tail_cut: + print(f'tail_cut atom is not found in special ligand {efp_residues[efp_counter-1]}') + sys.exit() + head_coord = cut_frag(head_cut, tail_cut, CH_distance) + tail_coord = cut_frag(tail_cut, head_cut, CH_distance) + # Append the virtual hydrogen lines to the corresponding fragment lists. + head.append( + " H000 1.0" + + f"{head_coord[0]:.8f}".rjust(17) + + f"{head_coord[1]:.8f}".rjust(18) + + f"{head_coord[2]:.8f}".rjust(18) + "\n" + ) + tail.append( + " H000 1.0" + + f"{tail_coord[0]:.8f}".rjust(17) + + f"{tail_coord[1]:.8f}".rjust(18) + + f"{tail_coord[2]:.8f}".rjust(18) + "\n" + ) + + # won't make makefp inputs for water and ions, but count and save this info for later + elif efp_residue == 'water': + QM_water = False + for atom in frag: + if int(atom.split()[3]) in qm_IDs: + print(f'Found QM water in EFP list {atom}, continue...') + QM_water = True + break + if not QM_water: # append in EFP water list only EFP waters which fragments need to be in EFP region + efp_water_list.append(frag) + + elif efp_residue == 'ion': + if not int(frag[0].split()[3]) in qm_IDs: # only EFP ions in the list + efp_ion_list.append(frag) + + # now send prepared fragment lists to build makefp inputs + if efp_residue in ['aa', 'naa', 'caa', 'ligand']: # normal case + make_standard_inp(frag, efp_residue) + elif efp_residue == 'cut_ligand': + make_standard_inp(head, 'head') + make_standard_inp(tail, 'tail') + elif efp_residue == 'fancy': + print(f'Fancy frag {efp_residues[efp_counter-1]}') + fancy_list.append([frag[0].split()[1], frag]) + if efp_residue != 'none': + efp_lines.append(frag) + +######## + + # now do work to prepare a new fragment if any + efp_residue_old = efp_residue + efp_residue = 'none' + resid_old = line.split()[0] + resname_old = line.split()[1] + frag = [] + CO_charge = 0.0 + + res_type = residue_type(line.split()[1]) + if res_type != 'skip': + if efp_counter < len(efp_residues): # check that we have any efp fragments left to search for + if pos_match(line, efp_residues[efp_counter]): # found matching EFP fragment + + efp_residue = res_type + + # aminoacid + if efp_residue in ['aa','caa','naa']: + CO = [] + if efp_residue in ['aa', 'caa']: + CO.append(g96_charges[j-2]) + CO.append(g96_charges[j-1]) + CO_charge = float(g96_charges[j-2].split()[-1]) + float(g96_charges[j-1].split()[-1]) + # pop from mm_charges + mm_lines.pop() + mm_lines.pop() + accumulated_charge -= float(g96_charges[j-1].split()[-1]) + float(g96_charges[j-2].split()[-1]) + # add charge to previous MM residue + if efp_residue_old == 'none' : + CA_charge_index_list.append([CA_index, CO_charge]) + + # cut ligand + if efp_residue == 'cut_ligand': + headside, tailside, ring = define_ligand_cut(line.split()[1]) + + # no special treatment for other types + efp_counter += 1 + +# done with new-residue condition +# take care of saving lines into fragments + + if efp_residue != 'none' and efp_residue != 'skip': + frag.append(line) + else: + if efp_residue != 'skip': + mm_lines.append(line) + + # maybe add another check + if line.split()[2] == 'CA': + CA.append(line) + + # this part takes care of (integer)izing charge on classical AAs that were near EFP AAs. The charge of CO groups left from EFP AA fragments + # is added/subtracted to the Ca of the nearest classical AA + if efp_residue == 'none': # prepare for the case if the next residue will be EFP amino acid + CA_index = len(mm_lines) - 1 + if efp_residue_old in ['aa', 'naa'] and efp_residue == 'none' : # this is the case when the previous residue was EFP AA, but the present one is MM AA + CA_index_next = len(mm_lines) - 1 + CA_charge_index_list.append([CA_index_next, -CO_charge_next]) + CO_charge_next = 0 + + +### +# Updating charges in mm_lines to make total charge of MM region integer and opposite of total charge of (EFP + QM) region + +dcharge = 0.0 +for i in CA_charge_index_list: + dcharge += i[1] +print(f'Accumulated delta charge due to CO groups {dcharge}. We will fix it next.') + +j = 0 +for i in range(len(mm_lines)): + for line in CA_charge_index_list: + if i == line[0]: + mm_lines[i] = update_charge(mm_lines[i], line[1]) + +# take care of 'fancy' residues now +# this is very system specific and non-standard +for ligand in fancy_list: + if ligand[0] == 'SF4': + print('Updating SF4 and ACYS fragments.') + # part 1. Update SF4 fragment by adding ACYS residue groups (CB, HB1, HB2, SG) and H link atoms + acys_lines=[] + sf4_lines=[] + resids=['574','556','565','583'] + atoms=['CB','HB1','HB2','SG'] + bridges=[] + at_start = -1 + + for line in g96_charges: + if line.split()[1]=='ACYS' and line.split()[0] in resids: + if line.split()[2] in atoms: + acys_lines.append(line) + elif line.split()[2]=='CA': + bridges.append(line) + elif line.split()[1]=='SF4' and line.split()[0]=='1': + if(line.split()[2]=='FE1'): + at_start=int(line.split()[3]) + sf4_lines.append(line) + + virt_Hs=[] + i=0 + for line in acys_lines: + if 'CB' in line: + head=line + tail=bridges[i] + virt_coords=cut_frag(head,tail, CH_distance) + virt_Hs.append(virt_coords) + i+=1 + + outlines=[] + outlines.append(makefp_special_header(at_start)) + + for line in sf4_lines: + col1 = f" {line.split()[2]}".ljust(6) + if line.split()[2][0].upper() == 'F': + col2 = at_sym['FE'].ljust(4) + else: + col2 = at_sym[line.split()[2][0]].ljust(4) + x, y, z = [float(line.split()[i]) * 10 for i in range(4, 7)] + col3 = f"{x:.8f}".rjust(16) + col4 = f"{y:.8f}".rjust(15) + col5 = f"{z:.8f}".rjust(15) + outlines.append(f"{col1}{col2}{col3}{col4}{col5}\n") + for line in acys_lines: + col1 = f" {line.split()[2]}".ljust(6) + col2 = at_sym[line.split()[2][0]].ljust(4) + x, y, z = [float(line.split()[i]) * 10 for i in range(4, 7)] + col3 = f"{x:.8f}".rjust(16) + col4 = f"{y:.8f}".rjust(15) + col5 = f"{z:.8f}".rjust(15) + outlines.append(f"{col1}{col2}{col3}{col4}{col5}\n") + for line in virt_Hs: + col1=' H000 ' + col2 = '1.0 ' + col3 = f"{line[0]:.8f}".rjust(16) + col4 = f"{line[1]:.8f}".rjust(15) + col5 = f"{line[2]:.8f}".rjust(15) + outlines.append(f"{col1}{col2}{col3}{col4}{col5}\n") + + outlines.append(' $end\n') + outlines.append('!comment atoms to be erased:\n') + outlines.append('!polarization points to remove:\n') + + filename ='sf4_1_'+str(at_start)+'_'+timestamp+'_'+mutant + frag_list.append(filename) + + # adding to frag_list_info + lines = [] + lines.append(filename) + counter = 0 + start = 0 + for line in outlines: + if 'FE1' in line: + start = 1 + if start == 1: + if 'end' in line or 'END' in line: + start = 0 + break + else: + atom_name = 'A0' + str(counter+1) + line.split()[0] + x,y,z = [float(line.split()[j]) for j in range(2, 5)] + ll = f'{atom_name} {x:.8f} {y:.8f} {z:.8f}' + lines.append(ll) + counter += 1 + if counter == 3: + break + frag_list_info.update({filename:lines}) + + filename=filename+'.inp' + + with open(filename,'w') as out: + for line in outlines: + out.write(line) + + # Part 2. Updating ACYS fragments. Removing cysteine residue part (CB, HB1, HB2, SG) that were added to SF4 fragment. + # Adding capping hydrogen between CA and CB + + for filename in os.listdir('.'): + if filename.startswith('acys') and filename.endswith('.inp'): + outlines=[] + head = '' + tail = '' + with open(filename,'r') as acys: + acys_lines=acys.readlines() + start = 0 + for line in acys_lines: + if line.strip() == 'C1': + start = 1 + if start == 1: + if 'CA' in line: + head = line + if 'CB' in line: + tail = line + if line.split()[0] in ['CB', 'HB1', 'HB2', 'SG']: continue - elif int(atom.split()[3]) in qm_IDs: - qm_names.append(atom.split()[2]) - elif int(atom.split()[3]) in pol_remove: - pol_names.append(atom.split()[2]) - make_inp(frag, qm_names, pol_names) - frag = [] # Reset fragment lines - i += 1 # Move to the next EFP residue. - # Check if the fragment is complete (with more than one line) - # Check that this frag is the current EFP residue (and end of that residue) - # Check that this frag is not an amino acid (is a cofactor, lipid, etc) [redundant] - # No virtual hydrogens are added here - if len(frag) > 1 and frag[-1].split()[0] == efp_resis[i]: - if (frag[0].split()[0] != line.split()[0]) and frag[0].split()[1] not in known_amino_acids: - if frag[0].split()[1][1:] not in known_amino_acids: - qm_names = [] - pol_names = [] - for atom in frag: - if int(atom.split()[3]) in qm_IDs: - qm_names.append(atom.split()[2]) - elif int(atom.split()[3]) in pol_remove: - pol_names.append(atom.split()[2]) - make_inp(frag, qm_names, pol_names) - frag = [] - i += 1 - # If the current line belongs to the current EFP residue, add it to the fragment. - if line.split()[0] == efp_resis[i]: - if len(prev_co) > 1 and line.split()[1] in known_amino_acids: - # Add the previous two "C" or "O" atoms to the fragment if the current fragment is an amino acid - frag.append(prev_co[-2]) - frag.append(prev_co[-1]) - prev_co = [] - frag.append(line) - elif 'POSITION' in line: - start = True \ No newline at end of file + if 'end' in line or 'END' in line: + break + outlines.append(line) + + desired_dist = CH_distance + xh, yh, zh = [float(head.split()[i]) for i in range(2, 5)] + xt, yt, zt = [float(tail.split()[i]) for i in range(2, 5)] + dist_mag = np.sqrt((xh - xt)**2 + (yh - yt)**2 + (zh - zt)**2) + virt = [ + ((xt - xh) * desired_dist / dist_mag) + xh, + ((yt - yh) * desired_dist / dist_mag) + yh, + ((zt - zh) * desired_dist / dist_mag) + zh + ] + + outlines.append(f" H000 1.0 {virt[0]:.8f} {virt[1]:.8f} {virt[2]:.8f}\n") + outlines.append(' $end\n') + outlines.append('!comment atoms to be erased:\n') + outlines.append('!polarization points to remove:\n') + + outfile=filename #DANGEROUS. Rewriting inp file for acys + with open(outfile,'w') as out: + for line in outlines: + out.write(line) + + else: + print(f'Do not know what to do about fancy fragment {ligand}. Take care!') + + +############# +# ANALYSIS + +print('\n The main work is done. Some analysis now. Check it carefully!') + +total_charge = 0 +for line in g96_charges: + total_charge += float(line.split()[-1]) +print(f'Total charge {total_charge}') + +mm_charge = 0 +for line in mm_lines: + mm_charge += float(line.split()[-1]) +print(f'MM charge {mm_charge}') + + +### checking charge of EFP region based on prepared makefp input files +lines = [] +for fname in os.listdir('.'): + if fname.endswith('.inp'): + with open(fname, 'r') as f: + for line in f: + if 'icharg' in line: + lines.append(fname + ':' + line) + break + +efp_charge = 0 +charged_list = [] +for line in lines: + parts = line.split()[2] + res_charge = int(parts.split('=')[1]) + if res_charge != 0: + charged_list.append(line) + efp_charge += res_charge + + +print(f'Total charge of EFP region based on prepared makefp inputs: {efp_charge}') + + +print('\n Prepare files needed for building Q-Chem or libefp inputs... ') + +with open('frag_list.txt','w') as file: + for elem in frag_list: + file.write(elem+'\n') + +# $prot.efp file +with open('prot.efp', 'w') as file: + print(' Writing prot.efp file.') + file.write(' $prot\n') + file.write(' Generated by make_AAs.py\n') + file.write(' COORDINATES (BOHR)\n') + # convert coordinates to Bohr units + # make name as resid_resname_atom_name_atomid + for line in mm_lines: + parts = line.split() + if len(parts) != 8: + print(f'problem with mm_lines line {line}! Exit.') + sys.exit() + name = parts[0] + '_' + parts[1] + '_' + parts[2] + '_' + parts[3] + file.write(f'{name} {float(parts[4])*nm2Bohr:6.10} {float(parts[5])*nm2Bohr:6.10} {float(parts[6])*nm2Bohr:6.10} 0.0001 0.0\n') + file.write(' STOP\n') + # charges + file.write(' MONOPOLES\n') + for line in mm_lines: + parts = line.split() + if len(parts) != 8: + print(f'problem with mm_lines line {line}! Exit.') + sys.exit() + name = parts[0] + '_' + parts[1] + '_' + parts[2] + '_' + parts[3] + file.write(f'{name} {float(parts[7]):2.6} 0.0\n') + file.write(' STOP\n') + file.write(' $end\n') + +# writing prot fragment info to file +with open('prot.txt','w') as file: + print(' Writing prot.txt file.') + file.write('prot\n') + # convert coordinates to Angstrom units + # make name as resid_resname_atom_name_atomid + counter = 0 + for line in mm_lines: + if counter == 3: + break + parts = line.split() + if len(parts) != 8: + print(f'problem with mm_lines line {line}! Exit.') + sys.exit() + name = parts[0] + '_' + parts[1] + '_' + parts[2] + '_' + parts[3] + file.write(f'{name} {float(parts[4])*10:.8f} {float(parts[5])*10:.8f} {float(parts[6])*10:.8f}\n') + counter += 1 + + +# QM region for Q-Chem input +# define charge and multiplicity of the QM region +with open('qm_region_qchem.txt', 'w') as file: + print(' Writing QM region file.') + qm_lines = print_qchem_qm_lines(settings_file, g96_charges) + for line in qm_lines: + file.write(line + '\n') + +# prepare EFP water fragments +# Assume that efp water file uses TIP3P atom names, A01OW, A02HW1, A03HW2 +# This is Q-Chem format +# adjust as needed +with open('water_efp_region.txt', 'w') as file: + print(' Writing EFP water fragments into file.') + for water in efp_water_list: + ox,oy,oz = [float(water[0].split()[i])*10.0 for i in range(4, 7)] + h1x,h1y,h1z = [float(water[1].split()[i])*10.0 for i in range(4, 7)] + h2x,h2y,h2z = [float(water[2].split()[i])*10.0 for i in range(4, 7)] + file.write('water\n') + file.write(f'A01OW {ox:.8f} {oy:.8f} {oz:.8f}\n') + file.write(f'A02HW1 {h1x:.8f} {h1y:.8f} {h1z:.8f}\n') + file.write(f'A03HW2 {h2x:.8f} {h2y:.8f} {h2z:.8f}\n') + +# prepare EFP fragment output, in Q-Chem format +with open('efp_region.txt', 'w') as file: + print(' Writing EFP fragments into file.') + for frag in frag_list_info.values(): + for line in frag: + file.write(line + '\n') + + + + + + + + diff --git a/examples/flex-EFP/Scripts/make_qchem_input.py b/examples/flex-EFP/Scripts/make_qchem_input.py new file mode 100644 index 0000000..8a5bf3a --- /dev/null +++ b/examples/flex-EFP/Scripts/make_qchem_input.py @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Dec 4 12:33:41 2024 + +@author: lyuda + +Sample execution: + python make_qchem_input.py qchem_file_name + +The script will search for the following files in the current directory: + qm_region_qchem.txt + efp_region.txt + water_efp_region.txt + prot.txt + +""" + +import sys +import os +import numpy as np + +if len(sys.argv) != 2: + print('Provide qchem file name') + sys.exit() + +qchem_file_name = sys.argv[1] + +## Check charge, multiplicity, header, n_states (number of excited states) + +charge = 0 +multiplicity = 1 +n_states = 5 + + +# adjust as needed! +def build_header(n_states): + """ + Return a list of header lines (as strings) that will be prepended to the output. + Hard coded, you may want to adjust these parameters. + """ + header_lines = [ +'$rem\n', +'JOBTYPE SP\n', +'METHOD wB97x\n', +'BASIS 6-31G*\n', +'purecart = 1111\n', +'MAX_SCF_CYCLES=100\n', +'SCF_ALGORITHM = diis\n', +f'cis_n_roots {n_states}\n', +'cis_singlets true\n', +'cis_triplets false\n', +'max_cis_cycles 100\n', +'CIS_CONVERGENCE = 7\n', +'rpa 2\n', +'SYM_IGNORE TRUE\n', +'mem_total 250000 ! 32 proc on lslipche, 2 GB per processor\n', +'mem_static 1000\n', +'gui 2\n', +'EFP_COORD_XYZ TRUE\n', +'EFP TRUE\n', +'EFP_FRAGMENTS_ONLY FALSE\n', +'EFP_DISP FALSE\n', +'EFP_EXREP FALSE\n', +'EFP_QM_DISP FALSE\n', +'EFP_QM_EXREP FALSE\n', +'EFP_POL TRUE\n', +'EFP_QM_POL TRUE\n', +'efp_pairwise = 0\n', +'efp_print = 2\n', +'efp_pol_field_update = 3\n', +'EFP_POL_DAMP=1\n', +'EFP_POL_DAMP_TT_VALUE=300\n', +' MAKE_CUBE_FILES true ! triggers writing of cube files\n', +' PLOTS true\n', +'$end\n', +'\n'] + return header_lines + + +def get_plot_section(qm_outlines, n_states): + """ + Prepares $plots section basen on QM coordinates + + Parameters + ---------- + qm_outlines : $molecule section + + Returns + ------- + $plots section lines + + """ + outlines = [] + xmin = ymin = zmin = 100000 + xmax = ymax = zmax = -100000 + for line in qm_outlines: + line = line.rsplit() + if len(line) == 4: + #print(line) + if float(line[1]) < xmin: xmin = float(line[1]) + if float(line[1]) > xmax: xmax = float(line[1]) + if float(line[2]) < ymin: ymin = float(line[2]) + if float(line[2]) > ymax: ymax = float(line[2]) + if float(line[3]) < zmin: zmin = float(line[3]) + if float(line[3]) > zmax: zmax = float(line[3]) + + outlines.append('$plots\n') + outlines.extend(f' grid_range ({int(xmin) - 2},{int(xmax) + 2}) ({int(ymin) - 2},{int(ymax) + 2}) ({int(zmin) - 2},{int(zmax) + 2})\n') + gridx = (int(xmax)-int(xmin)+4)*3 + gridy = (int(ymax)-int(ymin)+4)*3 + gridz = (int(zmax)-int(zmin)+4)*3 + outlines.extend(f' grid_points {gridx} {gridy} {gridz}\n') + outlines.extend(f' attachment_detachment_density 1-{n_states}\n') + outlines.extend('$end\n\n') + + return outlines + + +##################################### +# main + +#if len(sys.argv) != 4: +# print('Please provide a full structure .g96 file, an EFP structure .g96 file, and a user_defined text file with QM atoms and QM-MM boundary atoms') +# sys.exit() + +qm_lines = [] +efp_lines = [] +water_lines = [] +prot_lines = [] + +with open('qm_region_qchem.txt', 'r') as qm_file: + try: + qm_lines = qm_file.readlines() + except: + print('Could not find qm_region_qchem.txt file. Exit.') + sys.error() + +with open('efp_region.txt', 'r') as efp_file: + try: + efp_lines = efp_file.readlines() + except: + print('Could not find efp_region.txt file. Take care! Continue...') + +with open('water_efp_region.txt', 'r') as water_file: + try: + water_lines = water_file.readlines() + except: + print('Could not find water_efp_region.txt file. Take care! Continue...') + +with open('prot.txt', 'r') as prot_file: + try: + prot_lines = prot_file.readlines() + except: + print('Could not find water_efp_region.txt file. Take care! Continue...') + + +# Build the output list by starting with the header. +header_lines = build_header(n_states) +outlines = [] +for line in header_lines: + outlines.append(line) + +# add qm section +outlines.append('$molecule\n') +outlines.append(f'{charge} {multiplicity}\n') +outlines.extend(qm_lines) +outlines.append('$end\n\n') + + +###### comment if not needed! ######## +# prepare $plot section +plot_lines = get_plot_section(qm_lines, n_states) +outlines.extend(plot_lines) + +# adding efp section and filling it with fragments, waters, prot fragment +outlines.append('\n$efp_fragments\n') +if efp_lines: + outlines.extend(efp_lines) +if water_lines: + outlines.extend(water_lines) +if prot_lines: + outlines.extend(prot_lines) +outlines.append('$end\n\n') + + +''' +#THESE LINES BELOW ARE FOR EFP PAIRWISE!! When 2 jobs are run from 1 file +outlines.append('\n') +outlines.append('@@@\n') +outlines.append('\n') +# then repeat all input sections starting with modified header +''' + +# Write the final output to a file. +#outname=g96_filename.replace('.g96','.in') +outname = qchem_file_name +print(f'Writing Q-Chem QM/EFP file {qchem_file_name}') +with open(outname, 'w') as f: + for line in outlines: + f.write(line) + #f.write('$end') diff --git a/examples/flex-EFP/Scripts/step4.Flexible_V7.py b/examples/flex-EFP/Scripts/step4.Flexible_V7.py new file mode 100644 index 0000000..2562c78 --- /dev/null +++ b/examples/flex-EFP/Scripts/step4.Flexible_V7.py @@ -0,0 +1,2057 @@ +''' +Translation and Rotation of EFP parameters using 3X3 Rotation Matrix. +(Zero Order Approximation) +======================================================================= +Calculate rotation matrix between conformation A and B, +in Bohr (.efp) and Angs unit (.inp) using three atoms. +The order of the atoms "must" be the same for both conformations. +Usage: python Flexible.py known.efp unknown.inp --> unknown.efp +''' +import argparse,sys,os,shutil,re,math,fnmatch +import numpy as np +import numpy + +def add_hydrogen(back_xyz,side_xyz): + convfactor = 1/0.529177249 + x1,y1,z1 = back_xyz[0],back_xyz[1],back_xyz[2] + x2,y2,z2 = side_xyz[0],side_xyz[1],side_xyz[2] + actual_length = math.sqrt(pow(x1-x2,2)+pow(y1-y2,2)+pow(z1-z2,2)) + desire_length = 1.07886*convfactor + + x3 = ((x2-x1)*desire_length/actual_length)+x1 + y3 = ((y2-y1)*desire_length/actual_length)+y1 + z3 = ((z2-z1)*desire_length/actual_length)+z1 + + return np.array([x3,y3,z3]) + +def read_efp_param(reflines1): + parm_file,find_file = [],[] + for obj in reflines1: + parm_file.append(obj) + find_file.append(obj.strip()) +# print parm_file.index(" COORDINATES (BOHR)\n") + return parm_file,find_file + +def read_new_struc(reflines2): + convfactor = 1/0.529177249 +# convfactor = 1 + temp1,temp2 = [],[] + lineN = 0 + start = 0 + for obj in reflines2: + line = obj.strip() + lineN += 1 + if '$data' in line: + start = 1 + coord1 = lineN+2 + continue +# elif "H000" in line: +# coord2 = lineN+1 + if start == 1: + if "$end" in line: + coord2 = lineN + break +# elif "$end" in line: +# coord2 = lineN +# else: +# continue + lineN = 0 + for obj in reflines2: + line = obj.strip() + lineN += 1 + if lineN > coord1 and lineN < coord2: + temp2.append(float(line.split()[2])*convfactor) + temp2.append(float(line.split()[3])*convfactor) + temp2.append(float(line.split()[4])*convfactor) + temp1.append(temp2) + temp2 = [] + return np.array(temp1) + +def save_efp_coord_and_ele(parm_file,find_file,save_pol,save_disp,save_xr): +# ======================================================================= +# Save reference coordinates and bond-mid points +# ======================================================================= + temp1,temp2 = [],[] + temp3,temp4 = [],[] + for i in range(find_file.index("COORDINATES (BOHR)")+1,find_file.index("MONOPOLES")-1,1): + if find_file[i].split()[0][0:2] != "BO": + temp2.append(find_file[i].split()[0]) + temp2.append(float(find_file[i].split()[1])) + temp2.append(float(find_file[i].split()[2])) + temp2.append(float(find_file[i].split()[3])) + temp1.append(temp2) + temp2 = [] + else: + temp4.append(find_file[i].split()[0]) + temp4.append(float(find_file[i].split()[1])) + temp4.append(float(find_file[i].split()[2])) + temp4.append(float(find_file[i].split()[3])) + temp3.append(temp4) + temp4 = [] + coor_xyz1 = np.array(temp1) + bmid_xyz1 = np.array(temp3) +# ======================================================================= +# Save reference monopole points +# ======================================================================= + ref_monop = [] + for i in range(find_file.index("MONOPOLES"),find_file.index("DIPOLES"),1): + ref_monop.append(parm_file[i]) +# ======================================================================= +# Save reference dipole points +# ======================================================================= + temp1,temp2 = [],[] + temp3,temp4 = [],[] + for i in range(find_file.index("DIPOLES")+1,find_file.index("QUADRUPOLES")-1,1): + if find_file[i].split()[0][0:2] != "BO": + temp2.append(find_file[i].split()[0]) + temp2.append(float(find_file[i].split()[1])) + temp2.append(float(find_file[i].split()[2])) + temp2.append(float(find_file[i].split()[3])) + temp1.append(temp2) + temp2 = [] + else: + temp4.append(find_file[i].split()[0]) + temp4.append(float(find_file[i].split()[1])) + temp4.append(float(find_file[i].split()[2])) + temp4.append(float(find_file[i].split()[3])) + temp3.append(temp4) + temp4 = [] + ref_coor_dip = np.array(temp1) + ref_bmid_dip = np.array(temp3) +# ======================================================================= +# Save reference quadrupole points +# ======================================================================= + temp1,temp2 = [],[] + temp3,temp4 = [],[] + for i in range(find_file.index("QUADRUPOLES")+1,find_file.index("OCTUPOLES")-1,1): + if len(find_file[i].split()) == 6: + if find_file[i].split()[0][0:2] != "BO": + temp2.append(find_file[i].split()[0]) + temp2.append(float(find_file[i].split()[1])) # XX + temp2.append(float(find_file[i].split()[2])) # YY + temp2.append(float(find_file[i].split()[3])) # ZZ + temp2.append(float(find_file[i].split()[4])) # XY + else: + temp4.append(find_file[i].split()[0]) + temp4.append(float(find_file[i].split()[1])) # XX + temp4.append(float(find_file[i].split()[2])) # YY + temp4.append(float(find_file[i].split()[3])) # ZZ + temp4.append(float(find_file[i].split()[4])) # XY + else: + if len(temp2) > 0: + temp2.append(float(find_file[i].split()[0])) # XZ + temp2.append(float(find_file[i].split()[1])) # YZ + temp1.append(temp2) + temp2 = [] + elif len(temp4) > 0: + temp4.append(float(find_file[i].split()[0])) # XZ + temp4.append(float(find_file[i].split()[1])) # YZ + temp3.append(temp4) + temp4 = [] + else: + print("WHAT? check quadrupole") + exit() + ref_coor_qup = np.array(temp1) + ref_bmid_qup = np.array(temp3) +# ======================================================================= +# Save reference octupole points +# ======================================================================= + temp1,temp2 = [],[] + temp3,temp4 = [],[] + + + if not save_pol: + end_of_ocpol = find_file.index("POLARIZABLE POINTS")-1 + else: + if not save_disp: + end_of_ocpol = find_file.index("DYNAMIC POLARIZABLE POINTS")-1 + else: + if not save_xr: + end_of_ocpol = find_file.index("PROJECTION BASIS SET")-1 + else: + end_of_ocpol = find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1 + +# if not save_pol: +# end_of_ocpol = find_file.index("POLARIZABLE POINTS")-1 +# else: + end_of_ocpol = find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1 + for i in range(find_file.index("OCTUPOLES")+1,end_of_ocpol,1): + if len(find_file[i].split()) == 6: + if find_file[i].split()[0][0:2] != "BO": + temp2.append(find_file[i].split()[0]) + temp2.append(float(find_file[i].split()[1])) # XXX + temp2.append(float(find_file[i].split()[2])) # YYY + temp2.append(float(find_file[i].split()[3])) # ZZZ + temp2.append(float(find_file[i].split()[4])) # XXY + else: + temp4.append(find_file[i].split()[0]) + temp4.append(float(find_file[i].split()[1])) # XXX + temp4.append(float(find_file[i].split()[2])) # YYY + temp4.append(float(find_file[i].split()[3])) # ZZZ + temp4.append(float(find_file[i].split()[4])) # XXY + elif len(find_file[i].split()) == 5: + if len(temp2) > 0: + temp2.append(float(find_file[i].split()[0])) # XXZ + temp2.append(float(find_file[i].split()[1])) # XYY + temp2.append(float(find_file[i].split()[2])) # YYZ + temp2.append(float(find_file[i].split()[3])) # XZZ + elif len(temp4) > 0: + temp4.append(float(find_file[i].split()[0])) # XXZ + temp4.append(float(find_file[i].split()[1])) # XYY + temp4.append(float(find_file[i].split()[2])) # YYZ + temp4.append(float(find_file[i].split()[3])) # XZZ + else: + print("WHAT? check octupole") + exit() + elif len(find_file[i].split()) == 2: + if len(temp2) > 0: + temp2.append(float(find_file[i].split()[0])) # YZZ + temp2.append(float(find_file[i].split()[1])) # XYZ + temp1.append(temp2) + temp2 = [] + elif len(temp4) > 0: + temp4.append(float(find_file[i].split()[0])) # YZZ + temp4.append(float(find_file[i].split()[1])) # XYZ + temp3.append(temp4) + temp4 = [] + else: + print("WHAT? check octupole") + exit() + else: + print("WHAT? check octupole") + exit() + ref_coor_ocp = np.array(temp1) + ref_bmid_ocp = np.array(temp3) + + screen = [] + for i in range(find_file.index("SCREEN2 (FROM VDWSCL= 0.700)"),find_file.index("$END")+1,1): + screen.append(parm_file[i]) + + return [coor_xyz1,bmid_xyz1,ref_monop,ref_coor_dip,ref_bmid_dip,ref_coor_qup,ref_bmid_qup,ref_coor_ocp,ref_bmid_ocp,screen] + +def save_efp_pol(parm_file,find_file,save_pol,save_disp,save_xr): +# ======================================================================= +# Save reference static polarizable points +# ======================================================================= + temp1,temp2 = [],[] + temp3,temp4 = [],[] + if not save_disp: + end_of_pol = find_file.index("DYNAMIC POLARIZABLE POINTS")-1 + else: + if not save_xr: + end_of_pol = find_file.index("PROJECTION BASIS SET")-1 + else: + end_of_pol = find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1 + + for i in range(find_file.index("POLARIZABLE POINTS")+1,end_of_pol,1): + if find_file[i].split()[0][0:2] == "CT": + temp2.append(find_file[i].split()[0]) + temp2.append(float(find_file[i].split()[1])) # X + temp2.append(float(find_file[i].split()[2])) # Y + temp2.append(float(find_file[i].split()[3])) # Z + temp1.append(temp2) + temp2 = [] + else: + if len(find_file[i].split()) == 5: + temp4.append(float(find_file[i].split()[0])) # XX XZ + temp4.append(float(find_file[i].split()[1])) # YY YZ + temp4.append(float(find_file[i].split()[2])) # ZZ YX + temp4.append(float(find_file[i].split()[3])) # XY ZX + elif len(find_file[i].split()) == 1: + temp4.append(float(find_file[i].split()[0])) # ZY + temp3.append(temp4) + temp4 = [] + else: + print("WHAT? check static polarizability") + exit() + ref_lmo = np.array(temp1) + ref_pol = np.array(temp3) + + return ref_lmo,ref_pol + +def save_efp_dyn(parm_file,find_file,save_pol,save_disp,save_xr): +# ======================================================================= +# Save reference dynamic polarizable points +# ======================================================================= + temp1,temp2 = [],[] + temp3,temp4 = [],[] + if not save_xr: + end_of_dyn = find_file.index("PROJECTION BASIS SET")-1 + else: + end_of_dyn = find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1 + for i in range(find_file.index("DYNAMIC POLARIZABLE POINTS")+1,end_of_dyn,1): + if find_file[i].split()[0] == "CT": + temp2.append(find_file[i].split()[0]+find_file[i].split()[1]) + temp2.append(float(find_file[i].split()[2])) + temp2.append(float(find_file[i].split()[3])) + temp2.append(float(find_file[i].split()[4])) + temp1.append(temp2) + temp2 = [] + else: + if len(find_file[i].split()) == 5: + temp4.append(float(find_file[i].split()[0])) # XX XZ + temp4.append(float(find_file[i].split()[1])) # YY YZ + temp4.append(float(find_file[i].split()[2])) # ZZ YX + temp4.append(float(find_file[i].split()[3])) # XY ZX + elif len(find_file[i].split()) == 1: + temp4.append(float(find_file[i].split()[0])) # ZY + temp3.append(temp4) + temp4 = [] + else: + print("WHAT? check dispersion") + exit() + ref_dyn = np.array(temp3) + return temp1,ref_dyn + +def save_efp_exr(parm_file,find_file,save_pol,save_disp,save_xr): +# ======================================================================= +# Save reference exchange-repulsion parameters +# ======================================================================= + prj_basis = [] # need to change multiplicity if it is more than two + orbital,n_basis = [],[] + for i in range(find_file.index("PROJECTION BASIS SET"),find_file.index("MULTIPLICITY 1")+3,1): + prj_basis.append(parm_file[i]) + line = find_file[i].split() + if len(line) > 0: + if line[0] == "S": + orbital.append(line[0]) + n_basis.append(1) + elif line[0] == "P": + orbital.append(line[0]) + n_basis.append(3) + elif line[0] == "L": + orbital.append(line[0]) + n_basis.append(4) + elif line[0] == "D": + orbital.append(line[0]) + n_basis.append(6) + elif line[0] == "F": + orbital.append(line[0]) + n_basis.append(10) + elif line[0] == "G": + print("G orbital rotation is not implemented!!") + print("Quit") + quit() + else: + continue + else: + continue + temp1 = [] + for i in range(find_file.index("MULTIPLICITY 1")+3,find_file.index("FOCK MATRIX ELEMENTS"),1): + temp1.append(parm_file[i][5:20]) + temp1.append(parm_file[i][20:35]) + temp1.append(parm_file[i][35:50]) + temp1.append(parm_file[i][50:65]) + temp1.append(parm_file[i][65:80]) + fock_mat = [] + for i in range(find_file.index("FOCK MATRIX ELEMENTS"),find_file.index("LMO CENTROIDS"),1): + fock_mat.append(parm_file[i]) + temp2,temp3 = [],[] + for i in range(find_file.index("LMO CENTROIDS")+1,find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1,1): + temp3.append(find_file[i].split()[0]) + temp3.append(float(find_file[i].split()[1])) + temp3.append(float(find_file[i].split()[2])) + temp3.append(float(find_file[i].split()[3])) + temp2.append(temp3) + temp3 = [] + xr_lmo = np.array(temp2) + + temp4 = [] + for i in range(len(temp1)): + if len(temp1[i]) < 10: + continue + else: + temp4.append(float(temp1[i])) + + n_lmo,n_bas = int(find_file[find_file.index("MULTIPLICITY 1")+2].split()[2]),int(find_file[find_file.index("MULTIPLICITY 1")+2].split()[3]) + ref_wvf = np.zeros((n_lmo,n_bas)) + lineN = 0 + for i in range(len(ref_wvf)): + for j in range(len(ref_wvf[i])): + ref_wvf[i][j] = temp4[lineN] + lineN += 1 + + return orbital,n_basis,ref_wvf,fock_mat,xr_lmo + +def organize_connectivity(C): + temp = [] + for i in range(0,len(C)-1,1): + v = C[i][0] + for j in range(i+1,len(C),1): + for k in C[i]: + s = C[i]+C[j] + s = remove_duplicates(s) + if s[0] == 0 or s[1] == 0 or s[2] == 0: + continue + else: + temp.append(s) + return temp + +def remove_duplicates(li): + j,temp = 0,[0,0,0] + + if len(list(set(li))) == 3: + for i in li: + if li.count(i) == 2: + temp[1] = i + else: + temp[j] = i + j += 2 + return temp + else: + return temp + +def reverse_lists(li): + temp = list() + for i in range(len(li)): + a = li[i] + a.reverse() + temp.append(a) + return temp + +def finalize_connectivity(li): + temp = list() + for i in li: + if i not in temp: + temp.append(i) + return temp + +def define_topology(midorder,nearest): # [atom1,atom2,atom3,midorder1-2,midorder2-3] atom2 is the center + for i in range(len(nearest)): + k,temp = 0,[] + if nearest[i][0]-nearest[i][1] > 0: + m1,m2 = nearest[i][0],nearest[i][1] + else: + m1,m2 = nearest[i][1],nearest[i][0] + if nearest[i][2]-nearest[i][1] > 0: + m3,m4 = nearest[i][2],nearest[i][1] + else: + m3,m4 = nearest[i][1],nearest[i][2] + temp.append(str(m1)+str(m2)) + temp.append(str(m3)+str(m4)) + + for j in range(len(temp)): + for k in range(len(midorder)): + if str(temp[j]) == str(midorder[k][0])+str(midorder[k][1]): + nearest[i] += [k+1] + else: + continue + return nearest + +def save_bond_connection(bmid,coord_xyz2): + convfactor = 1/0.529177249 + temp1,temp2,temp3 = [],[],[] + for i in range(len(bmid)): + if len(bmid[i][0].split("BO")[1]) == 2: + atom1,atom2 = bmid[i][0].split("BO")[1][0:1],bmid[i][0].split("BO")[1][1:2] + elif len(bmid[i][0].split("BO")[1]) == 3: + atom1,atom2 = bmid[i][0].split("BO")[1][0:2],bmid[i][0].split("BO")[1][2:3] + else: + if int(bmid[i][0].split("BO")[1][0:2]) > int(bmid[i][0].split("BO")[1][2:4]): + atom1,atom2 = bmid[i][0].split("BO")[1][0:2],bmid[i][0].split("BO")[1][2:4] + else: + atom1,atom2 = bmid[i][0].split("BO")[1][0:3],bmid[i][0].split("BO")[1][3:4] + + temp2.append(bmid[i][0]) + temp2.append((coord_xyz2[int(atom1)-1][0]+coord_xyz2[int(atom2)-1][0])/2) + temp2.append((coord_xyz2[int(atom1)-1][1]+coord_xyz2[int(atom2)-1][1])/2) + temp2.append((coord_xyz2[int(atom1)-1][2]+coord_xyz2[int(atom2)-1][2])/2) + temp1.append(temp2) + temp2 = [] + temp3.append([int(atom1),int(atom2)]) + + bmid_xyz2 = np.array(temp1) + connection = finalize_connectivity(organize_connectivity(temp3)+organize_connectivity(reverse_lists(temp3))) + + return bmid_xyz2,define_topology(reverse_lists(temp3),connection) + +def figure_atoms(atom,r,j): + if "C" in atom and r < 0.8: # LVS fix to accoount for LMOs in aromatic systems + return j + if "O" in atom and r < 0.8: + return j + elif "N" in atom and r < 0.8: + return j + elif "S" in atom and r < 1.15: + return j + elif "MG" in atom and r < 1.15: + return j + elif "FE" in atom and r < 1.15: + return j + else: + return "Nolone" + +def lmo_position(coor,bmid,ref_lmo,anum,connection): + outputs = [] + for i in range(len(connection)): + temp1,temp2,temp3,temp4,temp5 = [],[],[],[],[] + for j in range(len(ref_lmo)): + x1,y1,z1 = ref_lmo[j][1],ref_lmo[j][2],ref_lmo[j][3] + x2,y2,z2 = bmid[connection[i][3]-1][1],bmid[connection[i][3]-1][2],bmid[connection[i][3]-1][3] + x3,y3,z3 = bmid[connection[i][4]-1][1],bmid[connection[i][4]-1][2],bmid[connection[i][4]-1][3] + dx1,dy1,dz1 = float(x1)-float(x2),float(y1)-float(y2),float(z1)-float(z2) + dx2,dy2,dz2 = float(x1)-float(x3),float(y1)-float(y3),float(z1)-float(z3) + r1 = dx1**2+dy1**2+dz1**2 + r2 = dx2**2+dy2**2+dz2**2 + temp2.append(r1) + temp4.append(r2) + L1 = temp2.index(min(temp2)) + R1 = temp4.index(min(temp4)) + temp1.append(L1) # left + temp3.append(R1) # right + r1_min = temp2[L1] + r2_min = temp4[R1] + temp2[L1] = 1000 + temp4[R1] = 1000 + if math.sqrt(r1_min)/math.sqrt(min(temp2)) >= 0.6: + L2 = temp2.index(min(temp2)) + temp1.append(L2) + if math.sqrt(r2_min)/math.sqrt(min(temp4)) >= 0.6: + R2 = temp4.index(min(temp4)) + temp3.append(R2) + + for j in range(len(ref_lmo)): + x1,y1,z1 = ref_lmo[j][1],ref_lmo[j][2],ref_lmo[j][3] + x2,y2,z2 = coor[connection[i][0]-1][1],coor[connection[i][0]-1][2],coor[connection[i][0]-1][3] + x3,y3,z3 = coor[connection[i][1]-1][1],coor[connection[i][1]-1][2],coor[connection[i][1]-1][3] + x4,y4,z4 = coor[connection[i][2]-1][1],coor[connection[i][2]-1][2],coor[connection[i][2]-1][3] + dx1,dy1,dz1 = float(x1)-float(x2),float(y1)-float(y2),float(z1)-float(z2) + dx2,dy2,dz2 = float(x1)-float(x3),float(y1)-float(y3),float(z1)-float(z3) + dx3,dy3,dz3 = float(x1)-float(x4),float(y1)-float(y4),float(z1)-float(z4) + r1 = math.sqrt(dx1**2+dy1**2+dz1**2) + r2 = math.sqrt(dx2**2+dy2**2+dz2**2) + r3 = math.sqrt(dx3**2+dy3**2+dz3**2) + lone1 = figure_atoms(coor[connection[i][0]-1][0],r1,j) + lone2 = figure_atoms(coor[connection[i][1]-1][0],r2,j) + lone3 = figure_atoms(coor[connection[i][2]-1][0],r3,j) + if lone1 != "Nolone": + temp1.append(lone1) + if lone2 != "Nolone": + temp5.append(lone2) + if lone3 != "Nolone": + temp3.append(lone3) + left = list(set(temp1)) + cent = list(set(temp5)) + right = list(set(temp3)) + + outputs.append([left,cent,right]) + + return outputs + +def define_three_points(info,xyz,a,b,c,d,e,f): + x1,y1,z1 = float(xyz[info[a]-1][d]),float(xyz[info[a]-1][e]),float(xyz[info[a]-1][f]) + x2,y2,z2 = float(xyz[info[b]-1][d]),float(xyz[info[b]-1][e]),float(xyz[info[b]-1][f]) + x3,y3,z3 = float(xyz[info[c]-1][d]),float(xyz[info[c]-1][e]),float(xyz[info[c]-1][f]) + return np.array([[x1,y1,z1],[x2,y2,z2],[x3,y3,z3]]) + +def centroid(X): + C1 = (X[0][0]+X[1][0]+X[2][0])/3 + C2 = (X[0][1]+X[1][1]+X[2][1])/3 + C3 = (X[0][2]+X[1][2]+X[2][2])/3 + return [C1,C2,C3] +# C = sum(X)/len(X) +# return C + +def kabsch(P,Q): + C = np.dot(np.transpose(P), Q) + V, S, W = np.linalg.svd(C) + d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 + + if d: + S[-1] = -S[-1] + V[:, -1] = -V[:, -1] + + # Create Rotation matrix U + U = np.dot(V, W) + + return U + +def special_three_points(any_crd): +# Just for center atom!!! +# [atom1+atom2/2,center atom,normal_vector] + a,b,c = any_crd[0],any_crd[1],any_crd[2] + t = np.empty((3,3)) + for i in range(3): + t[0][i] = 0.0 + t[1][i] = (a[i]+b[i])/2 + t[2][0] = any_crd[0][1]*any_crd[1][2] - any_crd[0][2]*any_crd[1][1] + t[2][1] = any_crd[0][2]*any_crd[1][0] - any_crd[0][0]*any_crd[1][2] + t[2][2] = any_crd[0][0]*any_crd[1][1] - any_crd[0][1]*any_crd[1][0] + return t + +def compute_rotation_matrix(known,unknown,string): + if string: +# rot = kabsch(known,unknown) + t1 = matrix_normalization(special_three_points(matrix_normalization(unknown))) + t2 = matrix_normalization(special_three_points(matrix_normalization(known))) + r1 = matrix_finalization(t1) + r2 = matrix_finalization(t2) + rot = np.matmul(r1.T,r2) + else: + t1 = matrix_normalization(unknown) + t2 = matrix_normalization(known) + r1 = matrix_finalization(t1) + r2 = matrix_finalization(t2) + rot = np.matmul(r1.T,r2) + + return rot + +def matrix_normalization(any_crd): + a,b,c = any_crd[0],any_crd[1],any_crd[2] + t = np.empty((3,3)) + t1norm,t2norm = 0.0,0.0 + for i in range(3): + t[0][i] = a[i]-b[i] + t[1][i] = c[i]-b[i] + t1norm += pow(t[0][i],2) + t2norm += pow(t[1][i],2) + t1norm,t2norm = 1/math.sqrt(t1norm),1/math.sqrt(t2norm) + for i in range(3): + t[0][i] = t[0][i]*t1norm + t[1][i] = t[1][i]*t2norm + return t + +def matrix_finalization(t): + dotprd = t[0][0]*t[1][0]+t[0][1]*t[1][1]+t[0][2]*t[1][2] + t[1][0] = t[1][0]-dotprd*t[0][0] + t[1][1] = t[1][1]-dotprd*t[0][1] + t[1][2] = t[1][2]-dotprd*t[0][2] + t[2][0] = t[0][1]*t[1][2]-t[0][2]*t[1][1] + t[2][1] = t[0][2]*t[1][0]-t[0][0]*t[1][2] + t[2][2] = t[0][0]*t[1][1]-t[0][1]*t[1][0] + for i in range(3): + tnorm = 0.0 + for j in range(3): + tnorm += pow(t[i][j],2) + if tnorm == float(0.0): + tnorm = 1.0 + else: + tnorm = tnorm + for j in range(3): + t[i][j] = t[i][j]/math.sqrt(tnorm) + return t + +def compute_multipole(tensor,rot): + tensor = tensor.reshape(6) + temp = np.empty((3,3)) + temp[0][0] = tensor[0]*rot[0][0] + tensor[1]*rot[0][1] + tensor[3]*rot[0][2] # XX*Rot[0][0] + XY*Rot[1][0] + YZ*Rot[2][0] + temp[0][1] = tensor[1]*rot[0][0] + tensor[2]*rot[0][1] + tensor[4]*rot[0][2] # XY*Rot[0][0] + YY*Rot[1][0] + XZ*Rot[2][0] + temp[0][2] = tensor[3]*rot[0][0] + tensor[4]*rot[0][1] + tensor[5]*rot[0][2] # YZ*Rot[0][0] + XZ*Rot[1][0] + ZZ*Rot[2][0] + temp[1][0] = tensor[0]*rot[1][0] + tensor[1]*rot[1][1] + tensor[3]*rot[1][2] # For OCTUPOLE, LOOK AT THE GAMESS CODE + temp[1][1] = tensor[1]*rot[1][0] + tensor[2]*rot[1][1] + tensor[4]*rot[1][2] + temp[1][2] = tensor[3]*rot[1][0] + tensor[4]*rot[1][1] + tensor[5]*rot[1][2] + temp[2][0] = tensor[0]*rot[2][0] + tensor[1]*rot[2][1] + tensor[3]*rot[2][2] + temp[2][1] = tensor[1]*rot[2][0] + tensor[2]*rot[2][1] + tensor[4]*rot[2][2] + temp[2][2] = tensor[3]*rot[2][0] + tensor[4]*rot[2][1] + tensor[5]*rot[2][2] + return temp + +def rotate_dipole(nefdip,rot,points,m): + efdip = np.empty((1,3)) + efdip[0] = np.array([float(points[1]),float(points[2]),float(points[3])]) + efdip[0] = np.matmul(rot,efdip.T).T + nefdip[m][0] += efdip[0][0] #x + nefdip[m][1] += efdip[0][1] #y + nefdip[m][2] += efdip[0][2] #z + nefdip[m][3] += 1.0 + + return nefdip + +def rotate_qupole(nefqua,rot,points,m): + temp,efqua = np.empty((6,1)),np.empty((3,3)) + temp = np.array([[float(points[1])],[float(points[4])],[float(points[2])], + [float(points[5])],[float(points[6])],[float(points[3])]]) + wrk = compute_multipole(temp,rot) + for n in range(3): + efqua[n] = np.matmul(rot,wrk[n]) + nefqua[m][0] += efqua[0][0] #xx + nefqua[m][1] += efqua[1][1] #yy + nefqua[m][2] += efqua[2][2] #zz + nefqua[m][3] += efqua[1][0] #xy + nefqua[m][4] += efqua[2][0] #xz + nefqua[m][5] += efqua[2][1] #yz + nefqua[m][6] += 1.0 + + return nefqua + +def rotate_ocpole(nefoct,rot,points,m): + temp01,temp02,temp03 = np.empty((6,1)),np.empty((6,1)),np.empty((6,1)) + temp04,temp05,temp06 = np.empty((3,3)),np.empty((3,3)),np.empty((3,3)) + temp07,temp08,temp09 = np.empty((2,3)),np.empty((2,3)),np.empty((2,3)) + temp10,temp11,efoct = np.empty((3,3)),np.empty((3,3)),np.empty((6,3)) + + temp01 = np.array([[float(points[1])],[float(points[4])],[float(points[6])], + [float(points[5])],[float(points[10])],[float(points[8])]]) + temp02 = np.array([[float(points[4])],[float(points[6])],[float(points[2])], + [float(points[10])],[float(points[7])],[float(points[9])]]) + temp03 = np.array([[float(points[5])],[float(points[10])],[float(points[7])], + [float(points[8])],[float(points[9])],[float(points[3])]]) + + wrk1 = compute_multipole(temp01,rot) + wrk2 = compute_multipole(temp02,rot) + wrk3 = compute_multipole(temp03,rot) + + for n in range(3): + temp04[n] = np.matmul(rot,wrk1[n]) + temp05[n] = np.matmul(rot,wrk2[n]) + temp06[n] = np.matmul(rot,wrk3[n]) + temp07[0][0],temp08[0][0],temp09[0][0] = temp04[0][0],temp05[0][0],temp06[0][0] + temp07[0][1],temp08[0][1],temp09[0][1] = temp04[1][0],temp05[1][0],temp06[1][0] + temp07[0][2],temp08[0][2],temp09[0][2] = temp04[1][1],temp05[1][1],temp06[1][1] + temp07[1][0],temp08[1][0],temp09[1][0] = temp04[2][0],temp05[2][0],temp06[2][0] + temp07[1][1],temp08[1][1],temp09[1][1] = temp04[2][1],temp05[2][1],temp06[2][1] + temp07[1][2],temp08[1][2],temp09[1][2] = temp04[2][2],temp05[2][2],temp06[2][2] + + for p in range(1): + for q in range(3): + temp10[p][q] = temp07[p][q] + temp10[p+1][q] = temp08[p][q] + temp10[p+2][q] = temp09[p][q] + temp11[p][q] = temp07[p+1][q] + temp11[p+1][q] = temp08[p+1][q] + temp11[p+2][q] = temp09[p+1][q] + temp07 = np.matmul(temp10.T,rot.T).T + temp08 = np.matmul(temp11.T,rot.T).T + + v = 0 + for p in range(3): + efoct[v] = temp07[p] + efoct[v+1] = temp08[p] + v += 2 + + nefoct[m][0] += efoct[0][0] #xxx + nefoct[m][1] += efoct[2][2] #yyy + nefoct[m][2] += efoct[5][2] #zzz + nefoct[m][3] += efoct[0][1] #xxy + nefoct[m][4] += efoct[1][0] #xxz + nefoct[m][5] += efoct[0][2] #xyy + nefoct[m][6] += efoct[3][1] #yyz + nefoct[m][7] += efoct[1][2] #xzz + nefoct[m][8] += efoct[3][2] #yzz + nefoct[m][9] += efoct[1][1] #xyz + nefoct[m][10] += 1.0 + + return nefoct + +def prepare_lmo(xyz,vector): + position = np.array([[float(xyz[1]),float(xyz[2]),float(xyz[3])]]) + tensor = np.array([[float(vector[0]),float(vector[3]),float(vector[4])], + [float(vector[6]),float(vector[1]),float(vector[5])], + [float(vector[7]),float(vector[8]),float(vector[2])]]) + return position,tensor + +def prepare_cmo(xyz): + position = np.array([[float(xyz[1]),float(xyz[2]),float(xyz[3])]]) + return position + +def rotate_lmo_tensor(neflmo,nefpol,rot,xyz1,xyz2,points,ref_lmo,ref_pol): + for pt in points: + lmo,tensor = prepare_lmo(ref_lmo[pt],ref_pol[pt]) + temp = [np.empty((1,3))] + + for n in range(3): + temp[0][0][n] = lmo[0][n]-xyz1[n] + efp = np.matmul(rot,temp[0].T).T + efpol = np.matmul(rot,tensor) + efpol = np.matmul(efpol,rot.T) + + neflmo[pt][0] += xyz2[0]+efp[0][0] #x + neflmo[pt][1] += xyz2[1]+efp[0][1] #y + neflmo[pt][2] += xyz2[2]+efp[0][2] #z + #if neflmo[pt][0] == 0.0 and neflmo[pt][1] == 0.0 and neflmo[pt][2] == 0.0: + # print('ERROR-ERROR') + neflmo[pt][3] += 1.0 + nefpol[pt][0] += efpol[0][0] #xx + nefpol[pt][1] += efpol[1][1] #yy + nefpol[pt][2] += efpol[2][2] #zz + nefpol[pt][3] += efpol[0][1] #xy + nefpol[pt][4] += efpol[0][2] #xz + nefpol[pt][5] += efpol[1][2] #yz + nefpol[pt][6] += efpol[1][0] #yx + nefpol[pt][7] += efpol[2][0] #zx + nefpol[pt][8] += efpol[2][1] #zy + nefpol[pt][9] += 1.0 + + return neflmo,nefpol + +def rotate_dyn_tensor(nefdmo,nefdol,rot,xyz1,xyz2,points,dyn_lmo,ref_dyn): + pNum = len(dyn_lmo)/12 + for k in range(12): + rep = pNum*k + for pt in points: + lmo,tensor = prepare_lmo(dyn_lmo[pt+rep],ref_dyn[pt+rep]) + temp = [np.empty((1,3))] + + for n in range(3): + temp[0][0][n] = lmo[0][n]-xyz1[n] + efp = np.matmul(rot,temp[0].T).T + efpol = np.matmul(rot,tensor) + efpol = np.matmul(efpol,rot.T) + nefdmo[pt+rep][0] += xyz2[0]+efp[0][0] #x + nefdmo[pt+rep][1] += xyz2[1]+efp[0][1] #y + nefdmo[pt+rep][2] += xyz2[2]+efp[0][2] #z + nefdmo[pt+rep][3] += 1.0 + nefdol[pt+rep][0] += efpol[0][0] #XX + nefdol[pt+rep][1] += efpol[1][1] #YY + nefdol[pt+rep][2] += efpol[2][2] #ZZ + nefdol[pt+rep][3] += efpol[0][1] #XY + nefdol[pt+rep][4] += efpol[0][2] #XZ + nefdol[pt+rep][5] += efpol[1][2] #YZ + nefdol[pt+rep][6] += efpol[1][0] #YX + nefdol[pt+rep][7] += efpol[2][0] #ZX + nefdol[pt+rep][8] += efpol[2][1] #ZY + nefdol[pt+rep][9] += 1.0 + + return nefdmo,nefdol + +def rotate_projection_wf(nefcmo,nefcof,rot,xyz1,xyz2,points,xr_lmo,orbital,n_basis,ref_wvf): + for pt in points: + tot = 0 + for n in range(len(orbital)): + if orbital[n] == "S": + tot += n_basis[n] + nefcof = s_orbital(nefcof,ref_wvf[pt],pt,tot-1) + elif orbital[n] == "P": + tot += n_basis[n] + nefcof = pl_orbital(nefcof,rot,ref_wvf[pt],pt,tot-1,True) + elif orbital[n] == "L": + tot += n_basis[n] + nefcof = pl_orbital(nefcof,rot,ref_wvf[pt],pt,tot-1,False) + elif orbital[n] == "D": + tot += n_basis[n] + nefcof = d_orbital(nefcof,rot,ref_wvf[pt],pt,tot-1) + elif orbital[n] == "F": + tot += n_basis[n] + nefcof = f_orbital(nefcof,rot,ref_wvf[pt],pt,tot-1) + else: + print("I said G orbital is not implemented!!") + quit() + nefcof[pt][tot] += 1.0 + + lmo = prepare_cmo(xr_lmo[pt]) + temp = [np.empty((1,3))] + + for n in range(3): + temp[0][0][n] = lmo[0][n]-xyz1[n] + efp = np.matmul(rot,temp[0].T).T + nefcmo[pt][0] += xyz2[0]+efp[0][0] #x + nefcmo[pt][1] += xyz2[1]+efp[0][1] #y + nefcmo[pt][2] += xyz2[2]+efp[0][2] #z + nefcmo[pt][3] += 1.0 + + return nefcmo,nefcof + +def s_orbital(nefcof,rotate,pt,end): + nefcof[pt][end] += rotate[end] + + return nefcof + +def pl_orbital(nefcof,rot,rotate,pt,end,string): + wf = np.empty((1,3)) + wf[0] = np.array([rotate[end-2],rotate[end-1],rotate[end]]) + wf[0] = np.matmul(rot,wf[0].T).T + + if string: + nefcof[pt][end-2] += wf[0][0] + nefcof[pt][end-1] += wf[0][1] + nefcof[pt][end] += wf[0][2] + else: # L orbital: does not touch the first coefficient + nefcof[pt][end-3] += rotate[end-3] + nefcof[pt][end-2] += wf[0][0] + nefcof[pt][end-1] += wf[0][1] + nefcof[pt][end] += wf[0][2] + + return nefcof + +def d_orbital(nefcof,rot,rotate,pt,end): + const = 1.154700538 + temp,wf = np.empty((6,1)),np.empty((3,3)) + temp = np.array([[rotate[end-5]],[rotate[end-2]/const],[rotate[end-4]], + [rotate[end-1]/const],[rotate[end]/const],[rotate[end-3]]]) + + wrk = compute_multipole(temp,rot) + for q in range(3): + wf[q] = np.matmul(rot,wrk[q]) + nefcof[pt][end-5] += wf[0][0] #xx + nefcof[pt][end-4] += wf[1][1] #yy + nefcof[pt][end-3] += wf[2][2] #zz + nefcof[pt][end-2] += wf[1][0]*const #xy + nefcof[pt][end-1] += wf[2][0]*const #xz + nefcof[pt][end-0] += wf[2][1]*const #yz + + return nefcof + +def f_orbital(nefcof,rot,rotate,pt,end): + two,three,sqrt3,sqrt5 = 2.0,3.0,1.732050808,2.236067978 + temp01,temp02,temp03 = np.empty((6,1)),np.empty((6,1)),np.empty((6,1)) + temp04,temp05,temp06 = np.empty((3,3)),np.empty((3,3)),np.empty((3,3)) + temp07,temp08,temp09 = np.empty((2,3)),np.empty((2,3)),np.empty((2,3)) + temp10,temp11,wf = np.empty((3,3)),np.empty((3,3)),np.empty((6,3)) + + temp01 = np.array([[rotate[end-9]],[rotate[end-6]*sqrt5/three],[rotate[end-4]*sqrt5/three], + [rotate[end-5]*sqrt5/three],[rotate[end]*(sqrt5/three)*(sqrt3/two)],[rotate[end-2]*sqrt5/three]]) + temp02 = np.array([[rotate[end-6]*sqrt5/three],[rotate[end-4]*sqrt5/three],[rotate[end-8]], + [rotate[end]*(sqrt5/three)*(sqrt3/two)],[rotate[end-3]*sqrt5/three],[rotate[end-1]*sqrt5/three]]) + temp03 = np.array([[rotate[end-5]*sqrt5/three],[rotate[end]*(sqrt5/three)*(sqrt3/two)],[rotate[end-3]*sqrt5/three], + [rotate[end-2]*sqrt5/three],[rotate[end-1]*sqrt5/three],[rotate[end-7]]]) + + wrk1 = compute_multipole(temp01,rot) + wrk2 = compute_multipole(temp02,rot) + wrk3 = compute_multipole(temp03,rot) + + for q in range(3): + temp04[q] = np.matmul(rot,wrk1[q]) + temp05[q] = np.matmul(rot,wrk2[q]) + temp06[q] = np.matmul(rot,wrk3[q]) + temp07[0][0],temp08[0][0],temp09[0][0] = temp04[0][0],temp05[0][0],temp06[0][0] + temp07[0][1],temp08[0][1],temp09[0][1] = temp04[1][0],temp05[1][0],temp06[1][0] + temp07[0][2],temp08[0][2],temp09[0][2] = temp04[1][1],temp05[1][1],temp06[1][1] + temp07[1][0],temp08[1][0],temp09[1][0] = temp04[2][0],temp05[2][0],temp06[2][0] + temp07[1][1],temp08[1][1],temp09[1][1] = temp04[2][1],temp05[2][1],temp06[2][1] + temp07[1][2],temp08[1][2],temp09[1][2] = temp04[2][2],temp05[2][2],temp06[2][2] + + for r in range(1): + for s in range(3): + temp10[r][s] = temp07[r][s] + temp10[r+1][s] = temp08[r][s] + temp10[r+2][s] = temp09[r][s] + temp11[r][s] = temp07[r+1][s] + temp11[r+1][s] = temp08[r+1][s] + temp11[r+2][s] = temp09[r+1][s] + temp07 = np.matmul(temp10.T,rot.T).T + temp08 = np.matmul(temp11.T,rot.T).T + + v = 0 + for r in range(3): + wf[v] = temp07[r] + wf[v+1] = temp08[r] + v += 2 + + nefcof[pt][end-9] += wf[0][0] #xxx + nefcof[pt][end-8] += wf[2][2] #yyy + nefcof[pt][end-7] += wf[5][2] #zzz + nefcof[pt][end-6] += wf[0][1]/sqrt5*three #xxy + nefcof[pt][end-5] += wf[1][0]/sqrt5*three #xxz + nefcof[pt][end-4] += wf[0][2]/sqrt5*three #xyy + nefcof[pt][end-3] += wf[3][1]/sqrt5*three #yyz + nefcof[pt][end-2] += wf[1][2]/sqrt5*three #xzz + nefcof[pt][end-1] += wf[3][2]/sqrt5*three #yzz + nefcof[pt][end-0] += wf[1][1]/sqrt5*three/sqrt3*two #xyz + + return nefcof + +def wrt_ele(find_file,parm_file,xyz,nefdip,nefqua,nefoct,no_pol,no_disp,no_xr,fragname,orig_frag): + string = '' + + for i in range(find_file.index("COORDINATES (BOHR)")+1): + if 'EFP DATA' in parm_file[i]: + string += 'PREPARED BY step4.Flexible_V7.py FROM '+ orig_frag + '\n' + continue + elif '$' in parm_file[i]: + string += ' $'+fragname+'\n' + else: + string += parm_file[i] + + lineN = 0 + for i in range(find_file.index("COORDINATES (BOHR)")+1,find_file.index("MONOPOLES")-1,1): + x0 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x1 = '{:{align}{width}}'.format('%.10f'%float(xyz[lineN][0]),align='>',width=23-len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%float(xyz[lineN][1]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(xyz[lineN][2]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%find_file[i].split()[4],align='>',width=12) + x5 = '{:{align}{width}}'.format('%s'%find_file[i].split()[5],align='>',width=5) + lineN += 1 + string += (x0+x1+x2+x3+x4+x5+'\n') + string += ' STOP\n' + + for i in range(find_file.index("MONOPOLES"),find_file.index("DIPOLES")+1,1): + string += parm_file[i] + + lineN = 0 + for i in range(find_file.index("DIPOLES")+1,find_file.index("QUADRUPOLES")-1,1): + v1,v2,v3 = nefdip[lineN][0]/nefdip[lineN][3],nefdip[lineN][1]/nefdip[lineN][3],nefdip[lineN][2]/nefdip[lineN][3] + x0 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x1 = '{:{align}{width}}'.format('%.10f'%v1,align='>',width=24-len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%v2,align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%v3,align='>',width=16) + lineN += 1 + string += (x0+x1+x2+x3+'\n') + string += ' STOP\n QUADRUPOLES\n' + + lineN = 0 + y1 = 1 + for i in range(find_file.index("QUADRUPOLES")+1,find_file.index("OCTUPOLES")-1,1): + if y1%2 == 1: + v1,v2 = nefqua[lineN][0]/nefqua[lineN][6],nefqua[lineN][1]/nefqua[lineN][6] + v3,v4 = nefqua[lineN][2]/nefqua[lineN][6],nefqua[lineN][3]/nefqua[lineN][6] + x0 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x1 = '{:{align}{width}}'.format('%.10f'%v1,align='>',width=24-len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%v2,align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%v3,align='>',width=16) + x4 = '{:{align}{width}}'.format('%.10f'%v4,align='>',width=16) + string += (x0+x1+x2+x3+x4+' >'+'\n') + else: + v5,v6 = nefqua[lineN][4]/nefqua[lineN][6],nefqua[lineN][5]/nefqua[lineN][6] + x1 = '{:{align}{width}}'.format('%.10f'%v5,align='>',width=24) + x2 = '{:{align}{width}}'.format('%.10f'%v6,align='>',width=16) + lineN += 1 + string += (x1+x2+'\n') + y1 += 1 + string += ' STOP\n OCTUPOLES\n' + + lineN = 0 + y2 = 1 + if not no_pol: + end_of_ocpol = find_file.index("POLARIZABLE POINTS")-1 + else: + if not no_disp: + end_of_ocpol = find_file.index("DYNAMIC POLARIZABLE POINTS")-1 + else: + if not no_xr: + end_of_ocpol = find_file.index("PROJECTION BASIS SET")-1 + else: + end_of_ocpol = find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1 + for i in range(find_file.index("OCTUPOLES")+1,end_of_ocpol,1): + if y2%3 == 1: + v1,v2 = nefoct[lineN][0]/nefoct[lineN][10],nefoct[lineN][1]/nefoct[lineN][10] + v3,v4 = nefoct[lineN][2]/nefoct[lineN][10],nefoct[lineN][3]/nefoct[lineN][10] + x0 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x1 = '{:{align}{width}}'.format('%.9f'%v1,align='>',width=24-len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.9f'%v2,align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%v3,align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%v4,align='>',width=16) + string += (x0+x1+x2+x3+x4+' >'+'\n') + elif y2%3 == 2: + v5,v6 = nefoct[lineN][4]/nefoct[lineN][10],nefoct[lineN][5]/nefoct[lineN][10] + v7,v8 = nefoct[lineN][6]/nefoct[lineN][10],nefoct[lineN][7]/nefoct[lineN][10] + x1 = '{:{align}{width}}'.format('%.9f'%v5,align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%v6,align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%v7,align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%v8,align='>',width=16) + string += (x1+x2+x3+x4+' >'+'\n') + else: + v9,v10 = nefoct[lineN][8]/nefoct[lineN][10],nefoct[lineN][9]/nefoct[lineN][10] + x1 = '{:{align}{width}}'.format('%.9f'%v9,align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%v10,align='>',width=16) + lineN += 1 + string += (x1+x2+'\n') + y2 += 1 + string += ' STOP\n' + + return string + +def wrt_pol(find_file,neflmo,nefpol,no_disp,no_xr): + string = ' POLARIZABLE POINTS\n' + if not no_disp: + end_of_pol = find_file.index("DYNAMIC POLARIZABLE POINTS")-1 + else: + if not no_xr: + end_of_pol = find_file.index("PROJECTION BASIS SET")-1 + else: + end_of_pol = find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1 + + lineN = 0 + t = 1 + for i in range(find_file.index("POLARIZABLE POINTS")+1,end_of_pol,1): + #print(lineN, neflmo) + if t%4 == 1: + v1,v2,v3 = neflmo[lineN][0]/neflmo[lineN][3],neflmo[lineN][1]/neflmo[lineN][3],neflmo[lineN][2]/neflmo[lineN][3] + x0 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x1 = '{:{align}{width}}'.format('%.10f'%v1,align='>',width=21-len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%v2,align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%v3,align='>',width=15) + string += (x0+x1+x2+x3+'\n') + elif t%4 == 2: + v1,v2 = nefpol[lineN][0]/nefpol[lineN][9],nefpol[lineN][1]/nefpol[lineN][9] + v3,v4 = nefpol[lineN][2]/nefpol[lineN][9],nefpol[lineN][3]/nefpol[lineN][9] + x1 = '{:{align}{width}}'.format('%.10f'%v1,align='>',width=21) + x2 = '{:{align}{width}}'.format('%.10f'%v2,align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%v3,align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%v4,align='>',width=15) + string += (x1+x2+x3+x4+' >'+'\n') + elif t%4 == 3: + v5,v6 = nefpol[lineN][4]/nefpol[lineN][9],nefpol[lineN][5]/nefpol[lineN][9] + v7,v8 = nefpol[lineN][6]/nefpol[lineN][9],nefpol[lineN][7]/nefpol[lineN][9] + x1 = '{:{align}{width}}'.format('%.10f'%v5,align='>',width=21) + x2 = '{:{align}{width}}'.format('%.10f'%v6,align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%v7,align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%v8,align='>',width=15) + string += (x1+x2+x3+x4+' >'+'\n') + else: + v9 = nefpol[lineN][8]/nefpol[lineN][9] + x1 = '{:{align}{width}}'.format('%.10f'%v9,align='>',width=21) + lineN += 1 + string += (x1+'\n') + t+= 1 + string += ' STOP\n' + + return string + +def wrt_disp(find_file,nefdmo,nefdol,no_xr): + string = ' DYNAMIC POLARIZABLE POINTS\n' + if not no_xr: + end_of_dyn = find_file.index("PROJECTION BASIS SET")-1 + else: + end_of_dyn = find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1 + + lineN = 0 + t = 1 + for i in range(find_file.index("DYNAMIC POLARIZABLE POINTS")+1,end_of_dyn,1): + if t%4 == 1: + v1,v2,v3 = nefdmo[lineN][0]/nefdmo[lineN][3],nefdmo[lineN][1]/nefdmo[lineN][3],nefdmo[lineN][2]/nefdmo[lineN][3] + x0 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x1 = '{:{align}{width}}'.format('%s'%find_file[i].split()[1],align='>',width=5-len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%v1,align='>',width=23-len(x0)-len(x1)) + x3 = '{:{align}{width}}'.format('%.10f'%v2,align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%v3,align='>',width=15) + if int(find_file[i].split()[1]) == 1: + line2 = find_file[i].split('--') + x5 = '{:{align}{width}}'.format('%s'%line2[1],align='>',width=22) + string += (x0+x1+x2+x3+x4+' --'+x5+'\n') + else: + string += (x0+x1+x2+x3+x4+'\n') + elif t%4 == 2: + v1,v2 = nefdol[lineN][0]/nefdol[lineN][9],nefdol[lineN][1]/nefdol[lineN][9] + v3,v4 = nefdol[lineN][2]/nefdol[lineN][9],nefdol[lineN][3]/nefdol[lineN][9] + x1 = '{:{align}{width}}'.format('%.10f'%v1,align='>',width=16) + x2 = '{:{align}{width}}'.format('%.10f'%v2,align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%v3,align='>',width=16) + x4 = '{:{align}{width}}'.format('%.10f'%v4,align='>',width=16) + string += (x1+x2+x3+x4+' >'+'\n') + elif t%4 == 3: + v5,v6 = nefdol[lineN][4]/nefdol[lineN][9],nefdol[lineN][5]/nefdol[lineN][9] + v7,v8 = nefdol[lineN][6]/nefdol[lineN][9],nefdol[lineN][7]/nefdol[lineN][9] + x1 = '{:{align}{width}}'.format('%.10f'%v5,align='>',width=16) + x2 = '{:{align}{width}}'.format('%.10f'%v6,align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%v7,align='>',width=16) + x4 = '{:{align}{width}}'.format('%.10f'%v8,align='>',width=16) + string += (x1+x2+x3+x4+' >'+'\n') + else: + v9 = nefdol[lineN][8]/nefdol[lineN][9] + x1 = '{:{align}{width}}'.format('%.10f'%v9,align='>',width=16) + lineN += 1 + string += (x1+'\n') + t+= 1 + string += ' STOP\n' + + return string + +def wrt_xr(find_file,parm_file,nefcmo,nefcof,xyz,fock_mat): + string = ' PROJECTION BASIS SET\n' + lineN = 0 + c,w = 0,0 + + for i in range(find_file.index("PROJECTION BASIS SET")+1,find_file.index("MULTIPLICITY 1")-1,1): + if "A" in find_file[i]: + x0 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x1 = '{:{align}{width}}'.format('%.10f'%xyz[c][0],align='>',width=25-len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%xyz[c][1],align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%xyz[c][2],align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%find_file[i].split()[4],align='>',width=7) + c += 1 + string += (x0+x1+x2+x3+x4+'\n') + else: + string += parm_file[i] + string += ' STOP\n MULTIPLICITY 1\n STOP\n' + string += parm_file[find_file.index("MULTIPLICITY 1")+2] + + for i in range(find_file.index("MULTIPLICITY 1")+3,find_file.index("FOCK MATRIX ELEMENTS"),1): + x0 = '{:{align}{width}}'.format('%s'%parm_file[i][0:2],align='>',width=2) + x1 = '{:{align}{width}}'.format('%s'%parm_file[i][2:5],align='>',width=3) + seek1,seek2,division = int(parm_file[i][0:2])-1,int(parm_file[i][2:5])*5,len(nefcof[0])-1 + + if seek2-5 < len(nefcof[0])-1: + v = nefcof[seek1][seek2-5]/nefcof[seek1][division] + x2 = '{:{align}{width}}'.format('%.8e'%v,align='>',width=15) + string += x0+x1+x2 + if seek2-4 < len(nefcof[0])-1: + v = nefcof[seek1][seek2-4]/nefcof[seek1][division] + x3 = '{:{align}{width}}'.format('%.8e'%v,align='>',width=15) + string += x3 + if seek2-3 < len(nefcof[0])-1: + v = nefcof[seek1][seek2-3]/nefcof[seek1][division] + x4 = '{:{align}{width}}'.format('%.8e'%v,align='>',width=15) + string += x4 + if seek2-2 < len(nefcof[0])-1: + v = nefcof[seek1][seek2-2]/nefcof[seek1][division] + x5 = '{:{align}{width}}'.format('%.8e'%v,align='>',width=15) + string += x5 + if seek2-1 < len(nefcof[0])-1: + v = nefcof[seek1][seek2-1]/nefcof[seek1][division] + x6 = '{:{align}{width}}'.format('%.8e'%v,align='>',width=15) + string += x6 + string += '\n' + + for i in range(len(fock_mat)): + string += fock_mat[i] + string += ' LMO CENTROIDS\n' + + for i in range(find_file.index("LMO CENTROIDS")+1,find_file.index("SCREEN2 (FROM VDWSCL= 0.700)")-1,1): + v1,v2,v3 = nefcmo[w][0]/nefcmo[w][3],nefcmo[w][1]/nefcmo[w][3],nefcmo[w][2]/nefcmo[w][3] + x1 = '{:{align}{width}}'.format('%s'%find_file[i].split()[0],align='<',width=len(find_file[i].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%v1,align='>',width=23-len(find_file[i].split()[0])) + x3 = '{:{align}{width}}'.format('%.10f'%v2,align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%v3,align='>',width=15) + w += 1 + string += (x1+x2+x3+x4+'\n') + string += ' STOP\n' + + return string + +def rotate_parameter(reflines,coord_xyz2,no_pol,no_disp,no_xr,fragname, orig_frag): + parm_file,find_file = read_efp_param(reflines) + # coor_xyz1,bmid_xyz1,ref_monop,ref_coor_dip,ref_bmid_dip,ref_coor_qup,ref_bmid_qup,ref_coor_ocp,ref_bmid_ocp,screen + # Default, electrostatics + default = save_efp_coord_and_ele(parm_file,find_file,no_pol,no_disp,no_xr) + atom_bmid = len(default[0])+len(default[1]) + nefdip,nefqua,nefoct = np.zeros((atom_bmid,4)),np.zeros((atom_bmid,7)),np.zeros((atom_bmid,11)) + bmid_xyz2,connection = save_bond_connection(default[1],coord_xyz2) + + #print(connection) + + if not no_pol: + ref_lmo,ref_pol = save_efp_pol(parm_file,find_file,no_pol,no_disp,no_xr) + lmo_pts = lmo_position(default[0],default[1],ref_lmo,len(default[0]),connection) + #print(ref_lmo) + #print(lmo_pts) + neflmo,nefpol = np.zeros((len(ref_lmo),4)),np.zeros((len(ref_lmo),10)) + if not no_disp: + dyn_lmo,ref_dyn = save_efp_dyn(parm_file,find_file,no_pol,no_disp,no_xr) + temp = [] + for i in range(len(dyn_lmo)/12): + temp.append(dyn_lmo[i]) + dyn_pts = lmo_position(default[0],default[1],np.array(temp),len(default[0]),connection) + nefdmo,nefdol = np.zeros((len(dyn_lmo),4)),np.zeros((len(dyn_lmo),10)) + if not no_xr: + orbital,n_basis,ref_wvf,fock_mat,xr_lmo = save_efp_exr(parm_file,find_file,no_pol,no_disp,no_xr) + xr_pts = lmo_position(default[0],default[1],xr_lmo,len(default[0]),connection) + nefcmo,nefcof = np.zeros((len(xr_lmo),4)),np.zeros((len(xr_lmo),sum(n_basis)+1)) + + #=============================================================================# + # Rotation Starts! # + # Obtain three rotation matrices to transfer parameters during each iteration # + #=============================================================================# + + for i in range(len(connection)): + # Define Three Connected Atoms + P1 = define_three_points(connection[i],default[0],0,1,2,1,2,3) + P2 = define_three_points(connection[i],default[0],2,1,0,1,2,3) + Q1 = define_three_points(connection[i],coord_xyz2,0,1,2,0,1,2) + Q2 = define_three_points(connection[i],coord_xyz2,2,1,0,0,1,2) + + rot_left = compute_rotation_matrix(P1,Q1,False) + rot_right = compute_rotation_matrix(P2,Q2,False) +# rot_cent = compute_rotation_matrix(P1,Q1,True) + + nefdip = rotate_dipole(nefdip,rot_left,default[3][connection[i][0]-1],connection[i][0]-1) + nefdip = rotate_dipole(nefdip,rot_left,default[4][connection[i][3]-1],connection[i][3]-1+len(coord_xyz2)) + nefdip = rotate_dipole(nefdip,rot_left,default[3][connection[i][1]-1],connection[i][1]-1) + nefdip = rotate_dipole(nefdip,rot_right,default[3][connection[i][1]-1],connection[i][1]-1) + nefdip = rotate_dipole(nefdip,rot_right,default[3][connection[i][2]-1],connection[i][2]-1) + nefdip = rotate_dipole(nefdip,rot_right,default[4][connection[i][4]-1],connection[i][4]-1+len(coord_xyz2)) + nefqua = rotate_qupole(nefqua,rot_left,default[5][connection[i][0]-1],connection[i][0]-1) + nefqua = rotate_qupole(nefqua,rot_left,default[6][connection[i][3]-1],connection[i][3]-1+len(coord_xyz2)) + nefqua = rotate_qupole(nefqua,rot_left,default[5][connection[i][1]-1],connection[i][1]-1) + nefqua = rotate_qupole(nefqua,rot_right,default[5][connection[i][1]-1],connection[i][1]-1) + nefqua = rotate_qupole(nefqua,rot_right,default[5][connection[i][2]-1],connection[i][2]-1) + nefqua = rotate_qupole(nefqua,rot_right,default[6][connection[i][4]-1],connection[i][4]-1+len(coord_xyz2)) + nefoct = rotate_ocpole(nefoct,rot_left,default[7][connection[i][0]-1],connection[i][0]-1) + nefoct = rotate_ocpole(nefoct,rot_left,default[8][connection[i][3]-1],connection[i][3]-1+len(coord_xyz2)) + nefoct = rotate_ocpole(nefoct,rot_left,default[7][connection[i][1]-1],connection[i][1]-1) + nefoct = rotate_ocpole(nefoct,rot_right,default[7][connection[i][1]-1],connection[i][1]-1) + nefoct = rotate_ocpole(nefoct,rot_right,default[7][connection[i][2]-1],connection[i][2]-1) + nefoct = rotate_ocpole(nefoct,rot_right,default[8][connection[i][4]-1],connection[i][4]-1+len(coord_xyz2)) + +# nefdip = rotate_dipole(nefdip,rot_left,default[3][connection[i][0]-1],connection[i][0]-1) +# nefdip = rotate_dipole(nefdip,rot_left,default[4][connection[i][3]-1],connection[i][3]-1+len(coord_xyz2)) +# nefdip = rotate_dipole(nefdip,rot_cent,default[3][connection[i][1]-1],connection[i][1]-1) +# nefdip = rotate_dipole(nefdip,rot_right,default[3][connection[i][2]-1],connection[i][2]-1) +# nefdip = rotate_dipole(nefdip,rot_right,default[4][connection[i][4]-1],connection[i][4]-1+len(coord_xyz2)) +# +# nefqua = rotate_qupole(nefqua,rot_left,default[5][connection[i][0]-1],connection[i][0]-1) +# nefqua = rotate_qupole(nefqua,rot_left,default[6][connection[i][3]-1],connection[i][3]-1+len(coord_xyz2)) +# nefqua = rotate_qupole(nefqua,rot_cent,default[5][connection[i][1]-1],connection[i][1]-1) +# nefqua = rotate_qupole(nefqua,rot_right,default[5][connection[i][2]-1],connection[i][2]-1) +# nefqua = rotate_qupole(nefqua,rot_right,default[6][connection[i][4]-1],connection[i][4]-1+len(coord_xyz2)) +# +# nefoct = rotate_ocpole(nefoct,rot_left,default[7][connection[i][0]-1],connection[i][0]-1) +# nefoct = rotate_ocpole(nefoct,rot_left,default[8][connection[i][3]-1],connection[i][3]-1+len(coord_xyz2)) +# nefoct = rotate_ocpole(nefoct,rot_cent,default[7][connection[i][1]-1],connection[i][1]-1) +# nefoct = rotate_ocpole(nefoct,rot_right,default[7][connection[i][2]-1],connection[i][2]-1) +# nefoct = rotate_ocpole(nefoct,rot_right,default[8][connection[i][4]-1],connection[i][4]-1+len(coord_xyz2)) + + temp1,temp2 = [],[] + for j in range(3): + x1,y1,z1 = default[0][connection[i][j]-1][1],default[0][connection[i][j]-1][2],default[0][connection[i][j]-1][3] + x2,y2,z2 = coord_xyz2[connection[i][j]-1][0],coord_xyz2[connection[i][j]-1][1],coord_xyz2[connection[i][j]-1][2] + temp1.append([float(x1),float(y1),float(z1)]) + temp2.append([float(x2),float(y2),float(z2)]) + + if not no_pol: + neflmo,nefpol = rotate_lmo_tensor(neflmo,nefpol,rot_left,temp1[0],temp2[0],lmo_pts[i][0],ref_lmo,ref_pol) + neflmo,nefpol = rotate_lmo_tensor(neflmo,nefpol,rot_left,temp1[1],temp2[1],lmo_pts[i][1],ref_lmo,ref_pol) + neflmo,nefpol = rotate_lmo_tensor(neflmo,nefpol,rot_right,temp1[1],temp2[1],lmo_pts[i][1],ref_lmo,ref_pol) + neflmo,nefpol = rotate_lmo_tensor(neflmo,nefpol,rot_right,temp1[2],temp2[2],lmo_pts[i][2],ref_lmo,ref_pol) + + if not no_disp: + nefdmo,nefdol = rotate_dyn_tensor(nefdmo,nefdol,rot_left,temp1[0],temp2[0],dyn_pts[i][0],dyn_lmo,ref_dyn) + nefdmo,nefdol = rotate_dyn_tensor(nefdmo,nefdol,rot_left,temp1[1],temp2[1],dyn_pts[i][1],dyn_lmo,ref_dyn) + nefdmo,nefdol = rotate_dyn_tensor(nefdmo,nefdol,rot_right,temp1[1],temp2[1],dyn_pts[i][1],dyn_lmo,ref_dyn) + nefdmo,nefdol = rotate_dyn_tensor(nefdmo,nefdol,rot_right,temp1[2],temp2[2],dyn_pts[i][2],dyn_lmo,ref_dyn) + + if not no_xr: + nefcmo,nefcof = rotate_projection_wf(nefcmo,nefcof,rot_left,temp1[0],temp2[0],xr_pts[i][0],xr_lmo,orbital,n_basis,ref_wvf) + nefcmo,nefcof = rotate_projection_wf(nefcmo,nefcof,rot_left,temp1[1],temp2[1],xr_pts[i][1],xr_lmo,orbital,n_basis,ref_wvf) + nefcmo,nefcof = rotate_projection_wf(nefcmo,nefcof,rot_right,temp1[1],temp2[1],xr_pts[i][1],xr_lmo,orbital,n_basis,ref_wvf) + nefcmo,nefcof = rotate_projection_wf(nefcmo,nefcof,rot_right,temp1[2],temp2[2],xr_pts[i][2],xr_lmo,orbital,n_basis,ref_wvf) + +# if not no_pol: +# neflmo,nefpol = rotate_lmo_tensor(neflmo,nefpol,rot_left,temp1[0],temp2[0],lmo_pts[i][0],ref_lmo,ref_pol) +# neflmo,nefpol = rotate_lmo_tensor(neflmo,nefpol,rot_cent,temp1[1],temp2[1],lmo_pts[i][1],ref_lmo,ref_pol) +# neflmo,nefpol = rotate_lmo_tensor(neflmo,nefpol,rot_right,temp1[2],temp2[2],lmo_pts[i][2],ref_lmo,ref_pol) +# +# if not no_disp: +# nefdmo,nefdol = rotate_dyn_tensor(nefdmo,nefdol,rot_left,temp1[0],temp2[0],dyn_pts[i][0],dyn_lmo,ref_dyn) +# nefdmo,nefdol = rotate_dyn_tensor(nefdmo,nefdol,rot_cent,temp1[1],temp2[1],dyn_pts[i][1],dyn_lmo,ref_dyn) +# nefdmo,nefdol = rotate_dyn_tensor(nefdmo,nefdol,rot_right,temp1[2],temp2[2],dyn_pts[i][2],dyn_lmo,ref_dyn) +# +# if not no_xr: +# nefcmo,nefcof = rotate_projection_wf(nefcmo,nefcof,rot_left,temp1[0],temp2[0],xr_pts[i][0],xr_lmo,orbital,n_basis,ref_wvf) +# nefcmo,nefcof = rotate_projection_wf(nefcmo,nefcof,rot_cent,temp1[1],temp2[1],xr_pts[i][1],xr_lmo,orbital,n_basis,ref_wvf) +# nefcmo,nefcof = rotate_projection_wf(nefcmo,nefcof,rot_right,temp1[2],temp2[2],xr_pts[i][2],xr_lmo,orbital,n_basis,ref_wvf) + +# outputfile = 'test1.efp' +# ofile = open(outputfile,'w') + + xyz = [] + for i in range(len(coord_xyz2)): + xyz.append(coord_xyz2[i]) + for i in range(len(bmid_xyz2)): + xyz.append(list(bmid_xyz2[i][1:])) + + new_parm = '' + new_parm += wrt_ele(find_file,parm_file,xyz,nefdip,nefqua,nefoct,no_pol,no_disp,no_xr,fragname,orig_frag) + + if not no_pol and not no_disp and not no_xr: + print("Rotate Electrostatic+Polarization+Dispersion+Exchange-Repulsion") + new_parm += wrt_pol(find_file,neflmo,nefpol,no_disp,no_xr) + new_parm += wrt_disp(find_file,nefdmo,nefdol,no_xr) + new_parm += wrt_xr(find_file,parm_file,nefcmo,nefcof,xyz,fock_mat) + elif not no_pol and not no_disp and no_xr: + print("Rotate Electrostatic+Polarization+Dispersion") + new_parm += wrt_pol(find_file,neflmo,nefpol,no_disp,no_xr) + new_parm += wrt_disp(find_file,nefdmo,nefdol,no_xr) + elif not no_pol and no_disp and not no_xr: + print("Rotate Electrostatic+Polarization+Exchange-Repulsion") + new_parm += wrt_pol(find_file,neflmo,nefpol,no_disp,no_xr) + new_parm += wrt_xr(find_file,parm_file,nefcmo,nefcof,xyz,fock_mat) + elif not no_pol and no_disp and no_xr: + print("Rotate Electrostatic+Polarization") + new_parm += wrt_pol(find_file,neflmo,nefpol,no_disp,no_xr) + elif no_pol and not no_disp and not no_xr: + print("Rotate Electrostatic+Dispersion+Exchange-Repulsion") + new_parm += wrt_disp(find_file,nefdmo,nefdol,no_xr) + new_parm += wrt_xr(find_file,parm_file,nefcmo,nefcof,xyz,fock_mat) + elif no_pol and not no_disp and no_xr: + print("Rotate Electrostatic+Dispersion") + new_parm += wrt_disp(find_file,nefdmo,nefdol,no_xr) + elif no_pol and no_disp and not no_xr: + print("Rotate Electrostatic+Exchange-Repulsion") + new_parm += wrt_xr(find_file,parm_file,nefcmo,nefcof,xyz,fock_mat) + else: + print("Rotate Electrostatic") + + for i in range(len(default[9])): + new_parm += default[9][i] + + return new_parm +#=============================================================# +# combine two efp files ( should be written efficiently !! ) # +#=============================================================# +def find_line_number(parm): + lineN = 0 + for i in range(len(parm.split('\n'))): + line = parm.split('\n')[i].strip() + lineN += 1 + if 'COORDINATES (BOHR)' in line: + coord1 = lineN + elif 'MONOPOLES' in line: + coord2 = lineN-1 + mon1 = lineN + elif 'DIPOLES' in line: + mon2 = lineN-1 + dip1 = lineN + elif 'QUADRUPOLES' in line: + dip2 = lineN-1 + qup1 = lineN + elif 'OCTUPOLES' in line: + qup2 = lineN-1 + ocp1 = lineN + elif 'POLARIZABLE POINTS' in line: + if 'DYNAMIC' not in line: + ocp2 = lineN-1 + pol1 = lineN + else: + pol2 = lineN-1 + dyn1 = lineN + elif 'PROJECTION BASIS SET' in line: + dyn2 = lineN-1 + bas1 = lineN + elif 'MULTIPLICITY' in line: + bas2 = lineN-1 + elif 'PROJECTION WAVEFUNCTION' in line: + wav1 = lineN + lmon,basn = int(line.split()[2]),int(line.split()[3]) + elif 'FOCK MATRIX ELEMENTS' in line: + wav2 = lineN + foc1 = lineN + elif 'LMO CENTROIDS' in line: + foc2 = lineN + lmo1 = lineN + elif 'SCREEN' in line: + if line.split()[0] == 'SCREEN2': + lmo2 = lineN-1 + scr21 = lineN + else: + scr22 = lineN-1 + scr11 = lineN + elif '$END' in line: + scr12 = lineN-1 + else: + continue + + return coord1,coord2,mon1,mon2,dip1,dip2,qup1,qup2,ocp1,ocp2,pol1,pol2,dyn1,dyn2,bas1,bas2,wav1,wav2,foc1,foc2,lmo1,lmo2,scr21,scr22,scr11,scr12,lmon,basn + +def combine_parm(back_parm,side_parm,back_anum): + + b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15,b16,b17,b18,b19,b20,b21,b22,b23,b24,b25,b26,b_lmo,b_bas = find_line_number(back_parm) + s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23,s24,s25,s26,s_lmo,s_bas = find_line_number(side_parm) + + parm_out = '' + parm_out += combine_coordinate(back_parm.split('\n')[b1:b2-1],side_parm.split('\n')[s1:s2-1],back_anum) + parm_out += combine_monopole(back_parm.split('\n')[b3:b4-1],side_parm.split('\n')[s3:s4-1],back_anum) + parm_out += combine_dipole(back_parm.split('\n')[b5:b6-1],side_parm.split('\n')[s5:s6-1],back_anum) + parm_out += combine_quadrupole(back_parm.split('\n')[b7:b8-1],side_parm.split('\n')[s7:s8-1],back_anum) + parm_out += combine_octupole(back_parm.split('\n')[b9:b10-1],side_parm.split('\n')[s9:s10-1],back_anum) + static_pol,lmo_cent = combine_polarizability(back_parm.split('\n')[b11:b12-1]+side_parm.split('\n')[s11:s12-1]) + parm_out += static_pol + parm_out += combine_dynamic_polarizability(back_parm.split('\n')[b13:b14-1],side_parm.split('\n')[s13:s14-1]) + parm_out += combine_projection_basis_set(back_parm.split('\n')[b15:b16-1],side_parm.split('\n')[s15:s16-1],back_anum) + parm_out += combine_wave_function(back_parm.split('\n')[b17:b18-1],side_parm.split('\n')[s17:s18-1],b_lmo,b_bas,s_lmo,s_bas) + parm_out += combine_fock_matrix(back_parm.split('\n')[b19:b20-1],side_parm.split('\n')[s19:s20-1],b_lmo,s_lmo) + parm_out += lmo_cent + parm_out += combine_screen(back_parm.split('\n')[b23:b24-1],side_parm.split('\n')[s23:s24-1],back_anum,'SCREEN2') + parm_out += combine_screen(back_parm.split('\n')[b25:b26-1],side_parm.split('\n')[s25:s26-1],back_anum,'SCREEN ') + + ofile = open('test4.efp','w') + ofile.write(parm_out+' $END') + +def rewrite_atom_number(atom,num): + return 'A'+str(int(atom[1:3])+num).zfill(2)+atom[3:] + +def rewrite_bmid_number(bmid,num): + + if len(bmid.split("BO")[1]) == 2: + atom1,atom2 = bmid.split("BO")[1][0:1],bmid.split("BO")[1][1:2] + elif len(bmid.split("BO")[1]) == 3: + atom1,atom2 = bmid.split("BO")[1][0:2],bmid.split("BO")[1][2:3] + else: + if int(bmid.split("BO")[1][0:2]) > int(bmid.split("BO")[1][2:4]): + atom1,atom2 = bmid.split("BO")[1][0:2],bmid.split("BO")[1][2:4] + else: + atom1,atom2 = bmid.split("BO")[1][0:3],bmid.split("BO")[1][3:4] + return "BO"+str(int(atom1)+num)+str(int(atom2)+num) + +def combine_coordinate(backbone,sidechain,backnum): + string1,string2 = '','' + string1 += ' RUNTYP=MAKEFP EFFECTIVE FRAGMENT POTENTIAL DATA FOLLOWS...\n' + string1 += ' FRAGNAMEEFP GENERATED by ZEROTH ORDER APPROXIMATION\n' + string1 += ' $FRAGNAME\n' + string1 += 'EFP DATA FOR FRAGNAME SCFTYP=RHF ... GENERATED WITH BASIS SET=XXX\n' + string1 += ' COORDINATES (BOHR)\n' + for i in range(len(backbone)): + if 'BO' in backbone[i].split()[0]: + bmid = rewrite_bmid_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=23-len(bmid)) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[2]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%backbone[i].split()[4],align='>',width=12) + x5 = '{:{align}{width}}'.format('%s'%backbone[i].split()[5],align='>',width=5) + string2 += x0+x1+x2+x3+x4+x5+'\n' + else: + atom = rewrite_atom_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=23-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[2]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%backbone[i].split()[4],align='>',width=12) + x5 = '{:{align}{width}}'.format('%s'%backbone[i].split()[5],align='>',width=5) + string1 += x0+x1+x2+x3+x4+x5+'\n' + for i in range(len(sidechain)): + if 'BO' in sidechain[i].split()[0]: + bmid = rewrite_bmid_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=23-len(bmid)) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[2]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%sidechain[i].split()[4],align='>',width=12) + x5 = '{:{align}{width}}'.format('%s'%sidechain[i].split()[5],align='>',width=5) + string2 += x0+x1+x2+x3+x4+x5+'\n' + else: + atom = rewrite_atom_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=23-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[2]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%sidechain[i].split()[4],align='>',width=12) + x5 = '{:{align}{width}}'.format('%s'%sidechain[i].split()[5],align='>',width=5) + string1 += x0+x1+x2+x3+x4+x5+'\n' + + return string1+string2+' STOP\n' + +def combine_monopole(backbone,sidechain,backnum): + chg = 0.0 + string1,string2 = '','' + string1 += ' MONOPOLES\n' + for i in range(len(backbone)): + if 'BO' in backbone[i].split()[0]: + bmid = rewrite_bmid_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=23-len(bmid)) + x2 = '{:{align}{width}}'.format('%.5f'%float(backbone[i].split()[2]),align='>',width=10) + string2 += x0+x1+x2+'\n' + chg += (float(backbone[i].split()[1])+float(backbone[i].split()[2])) + else: + atom = rewrite_atom_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=23-len(atom)) + x2 = '{:{align}{width}}'.format('%.5f'%float(backbone[i].split()[2]),align='>',width=10) + string1 += x0+x1+x2+'\n' + chg += (float(backbone[i].split()[1])+float(backbone[i].split()[2])) + for i in range(len(sidechain)): + if 'BO' in sidechain[i].split()[0]: + bmid = rewrite_bmid_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=23-len(bmid)) + x2 = '{:{align}{width}}'.format('%.5f'%float(sidechain[i].split()[2]),align='>',width=10) + string2 += x0+x1+x2+'\n' + chg += (float(sidechain[i].split()[1])+float(sidechain[i].split()[2])) + else: + atom = rewrite_atom_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=23-len(atom)) + x2 = '{:{align}{width}}'.format('%.5f'%float(sidechain[i].split()[2]),align='>',width=10) + string1 += x0+x1+x2+'\n' + chg += (float(sidechain[i].split()[1])+float(sidechain[i].split()[2])) + print('TOTAL FRAGMENT\'S CHARGE:'+'{:{align}{width}}'.format('%.5f'%float(chg),align='>',width=8)) + return string1+string2+' STOP\n' + +def combine_dipole(backbone,sidechain,backnum): + string1,string2 = '','' + string1 += ' DIPOLES\n' + for i in range(len(backbone)): + if 'BO' in backbone[i].split()[0]: + bmid = rewrite_bmid_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=24-len(bmid)) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[3]),align='>',width=16) + string2 += x0+x1+x2+x3+'\n' + else: + atom = rewrite_atom_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=24-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[3]),align='>',width=16) + string1 += x0+x1+x2+x3+'\n' + for i in range(len(sidechain)): + if 'BO' in sidechain[i].split()[0]: + bmid = rewrite_bmid_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=24-len(bmid)) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[3]),align='>',width=16) + string2 += x0+x1+x2+x3+'\n' + else: + atom = rewrite_atom_number(sidechain[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=24-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[3]),align='>',width=16) + string1 += x0+x1+x2+x3+'\n' + return string1+string2+' STOP\n' + +def combine_quadrupole(backbone,sidechain,backnum): + string = '' + string += ' QUADRUPOLES\n' + temp1,temp2 = [],[] + temp3,temp4 = [],[] + for i in range(len(backbone)): + if len(backbone[i].split()) == 6: + if 'BO' in backbone[i].split()[0]: + bmid = rewrite_bmid_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=24-len(bmid)) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[4]),align='>',width=16) + temp2.append(x0+x1+x2+x3+x4+' >\n') + else: + atom = rewrite_atom_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=24-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[4]),align='>',width=16) + temp4.append(x0+x1+x2+x3+x4+' >\n') + else: + if len(temp2) > 0: + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=16) + temp2.append(x1+x2+'\n') + temp1.append(temp2) + temp2 = [] + elif len(temp4) > 0: + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=16) + temp4.append(x1+x2+'\n') + temp3.append(temp4) + temp4 = [] + else: + print('wrong quadrupole printed') + exit() + + for i in range(len(sidechain)): + if len(sidechain[i].split()) == 6: + if 'BO' in sidechain[i].split()[0]: + bmid = rewrite_bmid_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=24-len(bmid)) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[4]),align='>',width=16) + temp2.append(x0+x1+x2+x3+x4+' >\n') + else: + atom = rewrite_atom_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=24-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[4]),align='>',width=16) + temp4.append(x0+x1+x2+x3+x4+' >\n') + else: + if len(temp2) > 0: + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=16) + temp2.append(x1+x2+'\n') + temp1.append(temp2) + temp2 = [] + elif len(temp4) > 0: + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=16) + temp4.append(x1+x2+'\n') + temp3.append(temp4) + temp4 = [] + else: + print('wrong quadrupole printed') + + qupol = temp3+temp1 + + for i in range(len(qupol)): + string += str(qupol[i][0])+str(qupol[i][1]) + + return string + ' STOP\n' + +def combine_octupole(backbone,sidechain,backnum): + string = ' OCTUPOLES\n' + temp1,temp2 = [],[] + temp3,temp4 = [],[] + for i in range(len(backbone)): + if len(backbone[i].split()) == 6: + if 'BO' in backbone[i].split()[0]: + bmid = rewrite_bmid_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=24-len(bmid)) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[4]),align='>',width=16) + temp2.append(x0+x1+x2+x3+x4+' >\n') + else: + atom = rewrite_atom_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=24-len(atom)) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[4]),align='>',width=16) + temp4.append(x0+x1+x2+x3+x4+' >\n') + elif len(backbone[i].split()) == 5: + if len(temp2) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[2]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[3]),align='>',width=16) + temp2.append(x1+x2+x3+x4+' >'+'\n') + elif len(temp4) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[2]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[3]),align='>',width=16) + temp4.append(x1+x2+x3+x4+' >'+'\n') + else: + print('wrong octupole printed') + exit() + else: + if len(temp2) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=16) + temp2.append(x1+x2+'\n') + temp1.append(temp2) + temp2 = [] + elif len(temp4) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=16) + temp4.append(x1+x2+'\n') + temp3.append(temp4) + temp4 = [] + else: + print("wrong octupole printed") + exit() + for i in range(len(sidechain)): + if len(sidechain[i].split()) == 6: + if 'BO' in sidechain[i].split()[0]: + bmid = rewrite_bmid_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=24-len(bmid)) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[4]),align='>',width=16) + temp2.append(x0+x1+x2+x3+x4+' >\n') + else: + atom = rewrite_atom_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=24-len(atom)) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[2]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[3]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[4]),align='>',width=16) + temp4.append(x0+x1+x2+x3+x4+' >\n') + elif len(sidechain[i].split()) == 5: + if len(temp2) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[2]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[3]),align='>',width=16) + temp2.append(x1+x2+x3+x4+' >'+'\n') + elif len(temp4) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=16) + x3 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[2]),align='>',width=16) + x4 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[3]),align='>',width=16) + temp4.append(x1+x2+x3+x4+' >'+'\n') + else: + print('wrong octupole printed') + exit() + else: + if len(temp2) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=16) + temp2.append(x1+x2+'\n') + temp1.append(temp2) + temp2 = [] + elif len(temp4) > 0: + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[0]),align='>',width=24) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=16) + temp4.append(x1+x2+'\n') + temp3.append(temp4) + temp4 = [] + else: + print("wrong octupole printed") + exit() + + ocpol = temp3+temp1 + + for i in range(len(ocpol)): + string += str(ocpol[i][0])+str(ocpol[i][1])+str(ocpol[i][2]) + + return string + ' STOP\n' + +def combine_polarizability(pol_tensor): + string1,string2 = ' POLARIZABLE POINTS\n',' LMO CENTROIDS\n' + t,ct = 1,1 + for i in range(len(pol_tensor)): + if t % 4 == 1: + lmop = 'CT'+str(ct) + x0 = '{:{align}{width}}'.format('%s'%lmop,align='<',width=len(lmop)) + x1 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[1]),align='>',width=18-len(lmop)) + x2 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[2]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[3]),align='>',width=15) + string1 += x0+x1+x2+x3+'\n' + string2 += x0+x1+x2+x3+'\n' + elif t % 4 == 2: + x1 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[0]),align='>',width=18) + x2 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[1]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[2]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[3]),align='>',width=15) + string1 += x1+x2+x3+x4+' >\n' + elif t%4 == 3: + x1 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[0]),align='>',width=18) + x2 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[1]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[2]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[3]),align='>',width=15) + string1 += x1+x2+x3+x4+' >\n' + else: + x1 = '{:{align}{width}}'.format('%.10f'%float(pol_tensor[i].split()[0]),align='>',width=18) + string1 += x1+'\n' + ct += 1 + t += 1 + string1 += ' STOP\n' + string2 += ' STOP\n' + + return string1,string2 + +def combine_dynamic_polarizability(backbone,sidechain): + string = ' DYNAMIC POLARIZABLE POINTS\n' + + temp1,temp2 = [],[] + for i in range(0,len(backbone),len(backbone)/12): + for j in range(i,i+len(backbone)/12,1): + temp2.append(backbone[j]) + temp1.append(temp2) + temp2 = [] + temp3,temp4 = [],[] + for i in range(0,len(sidechain),len(sidechain)/12): + for j in range(i,i+len(sidechain)/12,1): + temp4.append(sidechain[j].split(' --')[0]) + temp3.append(temp4) + temp4 = [] + + for i in range(12): + ct = 1 + for j in range(len(temp1[i])): + if temp1[i][j].split()[0] == 'CT': + x0 = '{:{align}{width}}'.format('%s'%temp1[i][j].split()[0],align='<',width=len(temp1[i][j].split()[0])) + x1 = '{:{align}{width}}'.format('%s'%str(ct),align='>',width=5-len(temp1[i][j].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%float(temp1[i][j].split()[2]),align='>',width=20-len(x0)-len(x1)) + x3 = '{:{align}{width}}'.format('%.10f'%float(temp1[i][j].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%float(temp1[i][j].split()[4]),align='>',width=15) + ct += 1 + if int(temp1[i][j].split()[1]) == 1: + line2 = temp1[i][j].split('--') + x5 = '{:{align}{width}}'.format('%s'%line2[1],align='>',width=22) + string += (x0+x1+x2+x3+x4+' --'+x5+'\n') + else: + string += (x0+x1+x2+x3+x4+'\n') + else: + string += str(temp1[i][j])+'\n' + for j in range(len(temp3[i])): + if temp3[i][j].split()[0] == 'CT': + x0 = '{:{align}{width}}'.format('%s'%temp3[i][j].split()[0],align='<',width=len(temp3[i][j].split()[0])) + x1 = '{:{align}{width}}'.format('%s'%str(ct),align='>',width=5-len(temp3[i][j].split()[0])) + x2 = '{:{align}{width}}'.format('%.10f'%float(temp3[i][j].split()[2]),align='>',width=20-len(x0)-len(x1)) + x3 = '{:{align}{width}}'.format('%.10f'%float(temp3[i][j].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%.10f'%float(temp3[i][j].split()[4]),align='>',width=15) + ct += 1 + string += (x0+x1+x2+x3+x4+'\n') + else: + string += str(temp3[i][j])+'\n' + + return string+' STOP\n' + +def combine_projection_basis_set(backbone,sidechain,backnum): + string = ' PROJECTION BASIS SET\n' + for i in range(len(backbone)): + if len(backbone[i].split()) > 0 and backbone[i].split()[0][0:1] == 'A': + atom = rewrite_atom_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[1]),align='>',width=25-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[2]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(backbone[i].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%backbone[i].split()[4],align='>',width=7) + string += (x0+x1+x2+x3+x4+'\n') + else: + string += backbone[i]+'\n' + + for i in range(len(sidechain)): + if len(sidechain[i].split()) > 0 and sidechain[i].split()[0][0:1] == 'A': + atom = rewrite_atom_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[1]),align='>',width=25-len(atom)) + x2 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[2]),align='>',width=15) + x3 = '{:{align}{width}}'.format('%.10f'%float(sidechain[i].split()[3]),align='>',width=15) + x4 = '{:{align}{width}}'.format('%s'%sidechain[i].split()[4],align='>',width=7) + string += (x0+x1+x2+x3+x4+'\n') + else: + string += sidechain[i]+'\n' + + return string+' STOP\n MULTIPLICITY 1\n STOP\n' + +def combine_wave_function(backbone,sidechain,b_lmo,b_bas,s_lmo,s_bas): + tot_lmo,tot_bas = b_lmo+s_lmo,b_bas+s_bas + wave_function = np.zeros((tot_lmo,tot_bas)) + string = ' PROJECTION WAVEFUNCTION '+str(tot_lmo)+' '+str(tot_bas)+'\n' + + temp1,temp2 = [],[] + for i in range(len(backbone)): + if backbone[i][5:20] != '': + temp1.append(backbone[i][5:20]) + if backbone[i][20:35] != '': + temp1.append(backbone[i][20:35]) + if backbone[i][35:50] != '': + temp1.append(backbone[i][35:50]) + if backbone[i][50:65] != '': + temp1.append(backbone[i][50:65]) + if backbone[i][65:80] != '': + temp1.append(backbone[i][65:80]) + for i in range(len(sidechain)): + if sidechain[i][5:20] != '': + temp2.append(sidechain[i][5:20]) + if sidechain[i][20:35] != '': + temp2.append(sidechain[i][20:35]) + if sidechain[i][35:50] != '': + temp2.append(sidechain[i][35:50]) + if sidechain[i][50:65] != '': + temp2.append(sidechain[i][50:65]) + if sidechain[i][65:80] != '': + temp2.append(sidechain[i][65:80]) + + lineN = 0 + for i in range(b_lmo): + for j in range(b_bas): + wave_function[i][j] = temp1[lineN] + lineN += 1 + lineN = 0 + for i in range(b_lmo,tot_lmo,1): + for j in range(b_bas,tot_bas,1): + wave_function[i][j] = temp2[lineN] + lineN += 1 + + for i in range(len(wave_function)): + lineN = 1 + x0 = '{:{align}{width}}'.format('%s'%str(i+1),align='>',width=2) + x1 = '{:{align}{width}}'.format('%s'%str(lineN),align='>',width=3) + for j in range(len(wave_function[i])): + if j != 0 and j % 5 == 0: + string += x0+x1+'\n' + lineN += 1 + x1 = '{:{align}{width}}'.format('%s'%str(lineN),align='>',width=3) + x1 += '{:{align}{width}}'.format('%.8e'%wave_function[i][j],align='>',width=15) + else: + x1 += '{:{align}{width}}'.format('%.8e'%wave_function[i][j],align='>',width=15) + string += x0+x1+'\n' + + return string + +def combine_fock_matrix(backbone,sidechain,b_lmo,s_lmo): + tot_lmo = b_lmo+s_lmo + temp1,temp2 = np.zeros((b_lmo,b_lmo)),[] + for i in range(len(backbone)): + for j in range(len(backbone[i].split())): + if backbone[i].split()[j] == '>': + continue + else: + temp2.append(float(backbone[i].split()[j])) + for i in range(b_lmo+1): + length = 0 + (i*(i-1)/2) + for j in range(i): + temp1[i-1][j] = temp2[j+length] + temp1[j][i-1] = temp2[j+length] + + + temp3,temp4 = np.zeros((s_lmo,s_lmo)),[] + for i in range(len(sidechain)): + for j in range(len(sidechain[i].split())): + if sidechain[i].split()[j] == '>': + continue + else: + temp4.append(float(sidechain[i].split()[j])) + for i in range(s_lmo+1): + length = 0 + (i*(i-1)/2) + for j in range(i): + temp3[i-1][j] = temp4[j+length] + temp3[j][i-1] = temp4[j+length] + + fock_mat = np.zeros((tot_lmo,tot_lmo)) + for i in range(len(temp1)): + for j in range(len(temp1[i])): + fock_mat[i][j] = temp1[i][j] + for i in range(len(temp3)): + for j in range(len(temp3[i])): + fock_mat[i+len(temp1)][j+len(temp1[0])] = temp3[i][j] + + + lineN,string = 0,'' + string += ' FOCK MATRIX ELEMENTS\n' + for i in range(len(fock_mat)): + for j in range(i+1): + x1 = '{:{align}{width}}'.format('%.10f'%float(fock_mat[i][j]),align='>',width=16) + lineN += 1 + string += x1 + if lineN == 4: + string += ' >\n' + lineN = 0 + + return string+'\n' + +def combine_screen(backbone,sidechain,backnum,scr): + string1 = scr+' (FROM VDWSCL= 0.700)\n' + string2 = '' + + for i in range(len(backbone)): + if 'BO' in backbone[i].split()[0]: + bmid = rewrite_bmid_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%' '+bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=23-len(bmid)) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[2]),align='>',width=14) + string2 += x0+x1+x2+'\n' + else: + atom = rewrite_atom_number(backbone[i].split()[0],0) + x0 = '{:{align}{width}}'.format('%s'%' '+atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[1]),align='>',width=23-len(atom)) + x2 = '{:{align}{width}}'.format('%.9f'%float(backbone[i].split()[2]),align='>',width=14) + string1 += x0+x1+x2+'\n' + for i in range(len(sidechain)): + if 'BO' in sidechain[i].split()[0]: + bmid = rewrite_bmid_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%' '+bmid,align='<',width=len(bmid)) + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=23-len(bmid)) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[2]),align='>',width=14) + string2 += x0+x1+x2+'\n' + else: + atom = rewrite_atom_number(sidechain[i].split()[0],backnum) + x0 = '{:{align}{width}}'.format('%s'%' '+atom,align='<',width=len(atom)) + x1 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[1]),align='>',width=23-len(atom)) + x2 = '{:{align}{width}}'.format('%.9f'%float(sidechain[i].split()[2]),align='>',width=14) + string1 += x0+x1+x2+'\n' + + return string1+string2+' STOP\n' + +def main(): + parser = argparse.ArgumentParser( + usage ='%(prog)s [options] conf_a.efp conf_b.inp\nDefalut: Rotate Everything!!!!', + formatter_class=argparse.RawDescriptionHelpFormatter) +# parser.add_argument('-v', '--version', action='version', version='Flexible EFP ' + __version__) + parser.add_argument('conf_a', metavar='conf_a', type=str, help='Conformation in .efp format (backbone)') + parser.add_argument('conf_b', metavar='conf_b', type=str, help='Conformation in .inp format (Unknown)') + parser.add_argument('-p', '--no-pol', action='store_true', help='Turn off the rotation of polarization') + parser.add_argument('-d', '--no-disp', action='store_true', help='Turn off the rotation of dispersion') + parser.add_argument('-xr', '--no-xr', action='store_true', help='Turn off the rotation of exchange repulsion') + args = parser.parse_args() + + ifile1 = open(args.conf_a,'r') + ifile2 = open(args.conf_b,'r') + reflines1 = ifile1.readlines() + reflines2 = ifile2.readlines() + coord_xyz2 = read_new_struc(reflines2) + + frag = args.conf_b.split('.inp')[0] + orig_frag = args.conf_a + frag_parm = rotate_parameter(reflines1,np.array(coord_xyz2),args.no_pol,args.no_disp,args.no_xr,frag, orig_frag) + + ofile1 = open(frag+'.efp','w') + ofile1.write(frag_parm) + + sys.exit() + # additional + #combine_parm(back_parm,side_parm,len(back_xyz2)) + +if __name__ == "__main__": + main()