diff --git a/examples/DNA_analysis.py b/examples/DNA_analysis.py deleted file mode 100644 index 8f250d5..0000000 --- a/examples/DNA_analysis.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -import os -import argparse -import mdtraj as md -import pandas as pd -import simtk.openmm -import open3SPN2 -import openawsem -from functools import partial - -parser = argparse.ArgumentParser() -parser.add_argument("--protein", help="The name of the protein", default="./clean.pdb") -parser.add_argument("-t", "--trajectory", type=str, default="./output.dcd") -parser.add_argument("-o", "--output", type=str, default=None, help="The Name of file that show your energy.") -args = parser.parse_args() - -trajectoryPath = os.path.abspath(args.trajectory) -if args.output is None: - outFile = os.path.join(os.path.dirname(trajectoryPath), "info.dat") -else: - outFile = os.path.join(os.path.dirname(trajectoryPath), args.output) - -# fix=open3SPN2.fixPDB(args.protein) -fix=open3SPN2.fixPDB(args.protein) - -#Create a table containing both the proteins and the DNA -complex_table=open3SPN2.pdb2table(fix) - -#Generate a coarse-grained model of the Protein molecules -protein_atoms=openawsem.Protein.CoarseGrain(complex_table) - -#Create the merged system -pdb=simtk.openmm.app.PDBFile(args.protein) -top=pdb.topology -coord=pdb.positions -forcefield=simtk.openmm.app.ForceField(openawsem.xml,open3SPN2.xml) -s=forcefield.createSystem(top) - -#Create the DNA and Protein Objects -dna=open3SPN2.DNA.fromCoarsePDB(args.protein) -with open('protein.seq') as ps: - protein_seq=ps.readlines()[0] -protein=openawsem.Protein.fromCoarsePDB(args.protein, - sequence=protein_seq) -dna.periodic=False -protein.periodic=False - -#Initialize the force dictionary -forces={} -for i in range(s.getNumForces()): - force = s.getForce(i) - force_name="CMMotionRemover" - -#Add 3SPN2 forces -for force_name in open3SPN2.forces: - # print(force_name) - force = open3SPN2.forces[force_name](dna) - if force_name in ['BasePair','CrossStacking']: - force.addForce(s) - else: - s.addForce(force) - forces[force_name] = force - -#Add AWSEM forces -ft=openawsem.functionTerms -openAWSEMforces = dict(Connectivity=ft.basicTerms.con_term, - Chain=ft.basicTerms.chain_term, - Chi=ft.basicTerms.chi_term, - Excl=ft.basicTerms.excl_term, - rama=ft.basicTerms.rama_term, - rama_pro=ft.basicTerms.rama_proline_term, - contact=ft.contactTerms.contact_term, - frag = partial(ft.templateTerms.fragment_memory_term, - frag_file_list_file = "./single_frags.mem", - npy_frag_table = "./single_frags.npy", - UseSavedFragTable = False, - k_fm = 0.04184/3), - beta1 = ft.hydrogenBondTerms.beta_term_1, - beta2 = ft.hydrogenBondTerms.beta_term_2, - beta3 = ft.hydrogenBondTerms.beta_term_3, - pap1 = ft.hydrogenBondTerms.pap_term_1, - pap2 = ft.hydrogenBondTerms.pap_term_2, - qval = partial(ft.biasTerms.q_value, - reference_pdb_file = "crystal-structure.pdb") - ) -protein.setup_virtual_sites(s) - -#Add DNA-protein interaction forces -for force_name in open3SPN2.protein_dna_forces: - # print(force_name) - force = open3SPN2.protein_dna_forces[force_name](dna,protein) - s.addForce(force) - forces[force_name] = force - -#Fix exclussions -for force_name in openAWSEMforces: - # print(force_name) - if force_name in ['contact']: - force = openAWSEMforces[force_name](protein, - withExclusion=False, - periodic=False) - # print(force.getNumExclusions()) - open3SPN2.addNonBondedExclusions(dna,force) - # print(force.getNumExclusions()) - elif force_name in ['Excl']: - force = openAWSEMforces[force_name](protein) - # print(force.getNumExclusions()) - open3SPN2.addNonBondedExclusions(dna,force) - # print(force.getNumExclusions()) - else: - force = openAWSEMforces[force_name](protein) - s.addForce(force) - forces[force_name] = force - -#Initialize the simulation -temperature=300 * simtk.openmm.unit.kelvin -platform_name='OpenCL' #'Reference','CPU','CUDA', 'OpenCL' -# platform_name='Reference' -integrator = simtk.openmm.LangevinIntegrator(temperature, - 1 / simtk.openmm.unit.picosecond, - 2 * simtk.openmm.unit.femtoseconds) -platform = simtk.openmm.Platform.getPlatformByName(platform_name) -simulation = simtk.openmm.app.Simulation(top,s, integrator, platform) -simulation.context.setPositions(coord) -energy_unit=simtk.openmm.unit.kilojoule_per_mole - -trajectory = md.load(args.trajectory, top=args.protein) - -energy_data = [] -for frame in trajectory: - simulation.context.setPositions(frame.xyz[0]) - #Obtain total energy - state = simulation.context.getState(getEnergy=True) - energy = state.getPotentialEnergy().value_in_unit(energy_unit) - - # Collect energies - energies = {} - - for force_name, force in forces.items(): - group = force.getForceGroup() - state = simulation.context.getState(getEnergy=True, groups=2**group) - energies[force_name] = state.getPotentialEnergy().value_in_unit(energy_unit) - - energy_data.append({"TotalEnergy": energy, **energies}) - -energy_df = pd.DataFrame(energy_data) -showAll = {"TotalEnergy": energy, **energies} - -# write energy_df into info.dat -with open(outFile, "w") as out: - line = " ".join(["{0:<8s}".format(i) for i in ["Steps"] + list(showAll.keys())]) - print(line) - out.write(line+"\n") - - for step, e in enumerate(energy_data): - line = " ".join([f"{step:<8}"] + ["{0:<8.2f}".format(i) for i in e.values()]) - print(line) - out.write(line+"\n") \ No newline at end of file diff --git a/open3SPN2/__init__.py b/open3SPN2/__init__.py index dea601c..52b7696 100644 --- a/open3SPN2/__init__.py +++ b/open3SPN2/__init__.py @@ -1,4 +1,5 @@ from open3SPN2.ff3SPN2 import * +from .optional_forces import optional_forces from . import _version __version__ = _version.get_versions()['version'] diff --git a/open3SPN2/compute_twist.py b/open3SPN2/compute_twist.py new file mode 100644 index 0000000..7fffb15 --- /dev/null +++ b/open3SPN2/compute_twist.py @@ -0,0 +1,118 @@ +#define these functions + +import numpy as np +import math + +def vector(p1, p2): + return [p2[0]-p1[0], p2[1]-p1[1], p2[2]-p1[2]] + +def vadd(p1,p2): + return [p2[0]/2+p1[0]/2, p2[1]/2+p1[1]/2, p2[2]/2+p1[2]/2] + +def vmulti(p1,k): + return [p1[0]*k,p1[1]*k,p1[2]*k] + +def vabs(a): + return math.sqrt(pow(float(a[0]),2)+pow(float(a[1]),2)+pow(float(a[2]),2)) + +def v_product(p1,p2): + return p1[0]*p2[0]+p1[1]*p2[1]+p1[2]*p2[2] + +def vcross_product(a, b): + cx = a[1]*b[2]-a[2]*b[1] + cy = a[2]*b[0]-a[0]*b[2] + cz = a[0]*b[1]-a[1]*b[0] + return [cx, cy, cz] + +#Various methods of calculations of twist. These are similar, using slightly different methods. But they should yield the same results. +def Xun_twist_OG(b_atoms): #original Xun's code + dna_length = int(len(b_atoms)/2) + b1_atoms = b_atoms[0:dna_length] + b2_atoms = b_atoms[dna_length:dna_length*2] + Twist = "" + for i in range(dna_length-1): + b_atom_1 = b_atoms[i] + b_atom_2 = b_atoms[-1-i] + b_atomn_1 = b_atoms[i+1] + b_atomn_2 = b_atoms[-1-i-1] + b_center = vadd(b_atom_1,b_atom_2) + b_centern = vadd(b_atomn_1,b_atomn_2) + zmst=vector(b_center,b_centern) + ys = vector(b_atom_1,b_atom_2) + ysn = vector(b_atomn_1,b_atomn_2) + yr = vector(ys,vmulti(zmst,v_product(ys,zmst))) + yrn = vector(ysn,vmulti(zmst,v_product(ysn,zmst))) + zmsto = vcross_product(yr,yrn) + ozmst = v_product(zmst,zmsto) + #print v_product(yr,yrn)/(vabs(yr)*vabs(yrn)) + twist = math.acos(v_product(yr,yrn)/(vabs(yr)*vabs(yrn)))/3.15159*180 + if twist > 180: + twist = twist - 180 + elif twist < -180: + twist = twist + 180 + if ozmst > 0: + twist = twist + else: + twist = 0 - twist + Twist += str(twist) + " " + return Twist + +def Xun_twist(Vec_1, Vec_2, Vec_3, Vec_4): #Steven's implementation of Xun's code!! + #WARNING: Sign is not taken into account but under in vivo conditions, should be but an edge case! + b_atom_1 = [float(x)/vabs(Vec_1[0]) for x in Vec_1[0]] + b_atom_2 = [float(x)/vabs(Vec_2[0]) for x in Vec_2[0]] + b_atomn_1 = [float(x)/vabs(Vec_3[0]) for x in Vec_3[0]] + b_atomn_2 = [float(x)/vabs(Vec_4[0]) for x in Vec_4[0]] + b_center = vadd(b_atom_1,b_atom_2) + b_centern = vadd(b_atomn_1,b_atomn_2) + zmst=vector(b_center,b_centern) + ys = vector(b_atom_1,b_atom_2) + ysn = vector(b_atomn_1,b_atomn_2) + yr = vector(ys,vmulti(zmst,v_product(ys,zmst))) + yrn = vector(ysn,vmulti(zmst,v_product(ysn,zmst))) + #zmsto = vcross_product(yr,yrn) + #ozmst = v_product(zmst,zmsto) + #print v_product(yr,yrn)/(vabs(yr)*vabs(yrn)) + twist = math.acos(v_product(yr,yrn)/(vabs(yr)*vabs(yrn))) #/3.15159*180 #Conversion from radians to degrees? + return twist + +def Steven_twist(Vec_1, Vec_2, Vec_3, Vec_4): #original Steven's code + x1, y1, z1 = Vec_1[0].astype(float) + x2, y2, z2 = Vec_2[0].astype(float) + x3, y3, z3 = Vec_3[0].astype(float) + x4, y4, z4 = Vec_4[0].astype(float) + + # Calculate vector A, B, and M + Ax, Ay, Az = x2 - x1, y2 - y1, z2 - z1 + Bx, By, Bz = x4 - x3, y4 - y3, z4 - z3 + Mx, My, Mz = (x4 + x3)/2 - (x1 + x2)/2, (y4 + y3)/2 - (y1 + y2)/2, (z4 + z3)/2 - (z1 + z2)/2 + + # Calculate vector C and D + Cx, Cy, Cz = Ay*Mz - Az*My, Az*Mx - Ax*Mz, Ax*My - Ay*Mx + Dx, Dy, Dz = By*Mz - Bz*My, Bz*Mx - Bx*Mz, Bx*My - By*Mx + + # Calculate magnitudes of vectors C and D + Cm = np.sqrt(Cx**2 + Cy**2 + Cz**2) + Dm = np.sqrt(Dx**2 + Dy**2 + Dz**2) + + # Calculate dot product and theta + dot = (Cx*Dx + Cy*Dy + Cz*Dz) / (Cm * Dm) + theta = np.arccos(np.clip(dot, -1, 1)) + + # Define Ex, Ey, and Ez + Ex, Ey, Ez = Cy*Dz-Cz*Dy, Cz*Dx-Cx*Dz, Cx*Dy-Cy*Dx + dot_sign = Ex*Mx+Ey*My+Ez*Mz + + # Calculate the angle according to the provided formula + angle = theta - 2*(np.sign(dot_sign) < 0)*(theta - np.pi) + + return angle + +def diff_twist(Vec_1, Vec_2, Vec_3, Vec_4): + return Steven_twist(Vec_1, Vec_2, Vec_3, Vec_4) - Xun_twist(Vec_1, Vec_2, Vec_3, Vec_4) + +def test_twist(Vec_1, Vec_2, Vec_3, Vec_4): + return Steven_twist(Vec_1, Vec_2, Vec_3, Vec_4) + +if __name__=='__main__': + test_twist() diff --git a/open3SPN2/force/__init__.py b/open3SPN2/force/__init__.py index cc7ef61..91f9dbe 100644 --- a/open3SPN2/force/__init__.py +++ b/open3SPN2/force/__init__.py @@ -1,5 +1,13 @@ -#from .template import DNAForce -#from .template import ProteinDNAForce +"""Core 3SPN2 + protein-DNA force-field terms. + +Only the physical force-field forces are exported here; these are the forces +auto-added by ``System.add3SPN2forces`` / ``addProteinDNAforces``. + +Opt-in biases and collective-variable readouts live in +:mod:`open3SPN2.force.bias` and :mod:`open3SPN2.force.collective_variables` and +are reached through :mod:`open3SPN2.optional_forces` (or imported directly from +those submodules), so an error in one of them cannot break ``import open3SPN2``. +""" from .dna import Bond from .dna import Angle @@ -13,5 +21,3 @@ from .protein_dna import ExclusionProteinDNA from .protein_dna import ElectrostaticsProteinDNA -from .protein_dna import AMHgoProteinDNA -from .protein_dna import String_length_ProteinDNA diff --git a/open3SPN2/force/bias.py b/open3SPN2/force/bias.py new file mode 100644 index 0000000..29a6de2 --- /dev/null +++ b/open3SPN2/force/bias.py @@ -0,0 +1,278 @@ +"""Bias forces for protein-DNA systems. + +These forces add energy to steer or restrain a simulation (position restraints, +string/umbrella pulling, harmonic biases on a collective variable). They are not +part of the physical force field and are never auto-added; a user opts in by +adding them explicitly. + +The identity-energy *readout* versions (report a value without biasing the +system) live in :mod:`open3SPN2.force.collective_variables`. +""" + +import logging + +import numpy as np +import openmm +import openmm.unit as unit + +from .template import DNAForce, ProteinDNAForce +from .dna import select_residue_atom_indices +from .protein_dna import (ElectrostaticsProteinDNA, select_string_groups, + _proteinResidues, _dnaResidues) +from . import force_groups + +logger = logging.getLogger(__name__) + + +class PositionRestraint(DNAForce): + """Harmonic restraint on the centroid of selected atoms. + + Energy ``0.5 * k * |centroid - r0|**2`` pulls the geometric centroid of the + selected atoms toward ``r0 = (x0, y0, z0)``. The centroid is the unweighted + mean ``sum(x_i)/N``, implemented as three ``CustomExternalForce("weight*x")`` + collective variables with ``weight = 1/N``. + + Parameters + ---------- + k : openmm.unit.Quantity + Force constant, in energy / length**2 (e.g. kJ/mol/nm**2). + x0, y0, z0 : openmm.unit.Quantity + Target position of the centroid. + appliedToResidues : sequence of int or None + ``resSeq`` values of the residues whose atoms are restrained; ``None`` + (default) uses the centroid of every atom. + """ + + def __init__(self, dna, k=1000 * unit.kilojoule_per_mole / unit.nanometer ** 2, + x0=1.0 * unit.nanometer, y0=1.0 * unit.nanometer, z0=1.0 * unit.nanometer, + appliedToResidues=None, force_group=force_groups.MEMBRANE, OpenCLPatch=True): + self.force_group = force_group + self.k = k + self.x0 = x0 + self.y0 = y0 + self.z0 = z0 + self.appliedToResidues = appliedToResidues + super().__init__(dna, OpenCLPatch=OpenCLPatch) + + def reset(self): + k = self.k.value_in_unit(unit.kilojoule_per_mole / unit.nanometer ** 2) + x0 = self.x0.value_in_unit(unit.nanometer) + y0 = self.y0.value_in_unit(unit.nanometer) + z0 = self.z0.value_in_unit(unit.nanometer) + + cx = openmm.CustomExternalForce("weight * x") + cy = openmm.CustomExternalForce("weight * y") + cz = openmm.CustomExternalForce("weight * z") + for cv in (cx, cy, cz): + cv.addPerParticleParameter("weight") + + harmonic = openmm.CustomCVForce( + f"0.5*{k}*((cx-({x0}))^2 + (cy-({y0}))^2 + (cz-({z0}))^2)" + ) + harmonic.addCollectiveVariable("cx", cx) + harmonic.addCollectiveVariable("cy", cy) + harmonic.addCollectiveVariable("cz", cz) + harmonic.setForceGroup(self.force_group) + self.force = harmonic + + def defineInteraction(self): + indices = select_residue_atom_indices(self.dna, self.appliedToResidues) + if len(indices) == 0: + raise ValueError("No atoms selected for PositionRestraint; check appliedToResidues.") + weight = 1.0 / len(indices) + for cv in range(3): + collective_variable = self.force.getCollectiveVariable(cv) + for i in indices: + collective_variable.addParticle(i, [weight]) + + +class StringProteinDNA(ProteinDNAForce): + """Harmonic string/umbrella potential between a protein and a DNA segment. + + Energy ``0.5 * k_string_PD * (distance(g1, g2) - r0)**2`` where ``g1`` is the + centroid of the protein ``CA`` atoms and ``g2`` the centroid of the DNA ``S`` + atoms. The chain arguments accept one or many chains (``'A'``, ``'AB'`` or + ``['A', 'B']``). + + Parameters + ---------- + r0 : float + Equilibrium centroid-centroid distance, in nanometers. + k_string_PD : float + Force constant, in kJ/mol/nm**2. + """ + + def __init__(self, dna, protein, r0, chain_protein='A', chain_DNA='B', + k_string_PD=10 * 4.184, protein_seg=False, group=None, + force_group=force_groups.PULLING): + self.k_string_PD = k_string_PD + self.chain_protein = chain_protein + self.chain_DNA = chain_DNA + self.r0 = r0 + self.protein_seg = protein_seg + self.group = group or [] + self.force_group = force_group + super().__init__(dna, protein) + + def reset(self): + stringForce = openmm.CustomCentroidBondForce( + 2, f"0.5*{self.k_string_PD}*(distance(g1,g2)-{self.r0})^2") + stringForce.setForceGroup(self.force_group) + self.force = stringForce + logger.debug("StringProteinDNA: r0=%s, k_string_PD=%s", self.r0, self.k_string_PD) + + def defineInteraction(self): + protein_index, dna_index = select_string_groups( + self.dna, self.chain_protein, self.chain_DNA, self.protein_seg, self.group) + self.force.addGroup(protein_index) + self.force.addGroup(dna_index) + self.force.addBond([0, 1]) + + +class ElectrostaticBias(ProteinDNAForce): + """Harmonic bias on the protein-DNA electrostatic energy. + + Adds ``0.5 * k_ebias * ((E_elec - center)/4.184)**2`` where ``E_elec`` is the + protein-DNA Debye-Huckel electrostatic energy (from + :class:`open3SPN2.force.protein_dna.ElectrostaticsProteinDNA`) used as a + collective variable. + + Units: ``E_elec`` is reported by OpenMM in kJ/mol, so ``center`` is in kJ/mol. + The ``/4.184`` converts the deviation ``(E_elec - center)`` into kcal/mol, so + ``k_ebias`` has units of kJ/mol per (kcal/mol)**2. + """ + + def __init__(self, dna, protein, k_ebias, center, k_elec, ldby, + cutoff_distance=None, force_group=force_groups.PROTEIN_DNA_BIAS): + self.k_ebias = k_ebias + self.center = center + self.k_elec = k_elec + self.ldby = ldby + self.force_group = force_group + self.cutoff_distance = cutoff_distance + super().__init__(dna, protein) + + def reset(self): + ebiasForce = openmm.CustomCVForce("0.5*k_ebias*((E_elec-center)/4.184)^2") + E_elec = ElectrostaticsProteinDNA(self.dna, self.protein, k=self.k_elec, + ldby=self.ldby, cutoff_distance=self.cutoff_distance) + ebiasForce.addCollectiveVariable("E_elec", E_elec.force) # kJ/mol + ebiasForce.addGlobalParameter("k_ebias", self.k_ebias) + ebiasForce.addGlobalParameter("center", self.center) # kJ/mol + ebiasForce.setForceGroup(self.force_group) + self.force = ebiasForce + + def defineInteraction(self): + logger.debug("ElectrostaticBias: center=%s, k_ebias=%s, k_elec=%s, ldby=%s", + self.center, self.k_ebias, self.k_elec, self.ldby) + + +class BasePairProteinHarmonicBias(ProteinDNAForce): + """Harmonic bias that holds a protein point near a target base-pair index. + + Two centroid groups span a stretch of DNA (``base_pair_left`` / + ``base_pair_right``, ``base_pair_sep`` base pairs apart) and a third is a + protein point. The protein's projection onto the left->right vector, + expressed as a base-pair offset ``i``, is harmonically restrained with energy + ``0.5 * k * i**2`` (``i = 0`` at the midpoint). + + The readout version is + :class:`open3SPN2.force.collective_variables.BasePairProteinPosition`. + """ + + def __init__(self, dna, protein, base_pair_left_indicies, base_pair_right_indicies, + protein_indices, base_pair_sep=4, k=4.184, + force_group=force_groups.PROTEIN_DNA_BIAS): + self.k = k + self.base_pair_left_indicies = base_pair_left_indicies + self.base_pair_right_indicies = base_pair_right_indicies + self.protein_indices = protein_indices + self.base_pair_sep = base_pair_sep + self.force_group = force_group + super().__init__(dna, protein) + + def reset(self): + # k is in kJ/mol per base-pair**2; i is the deviation in base pairs. + energy = f"0.5*{self.k}*(i)^2;" + # ratio (0..1 along left->right) -> base-pair offset; ratio 0.5 -> i = 0. + ratio_bp = f"i={self.base_pair_sep}*(ratio-0.5);" + # ratio = (VA . VD) / (VD . VD): projection of the protein vector VA onto + # the base-pair-separation vector VD, normalized by |VD|^2. + ratio_dots = ("ratio=VAVDdots/VDVDdots;" + "VAVDdots=VAx*VDx+VAy*VDy+VAz*VDz;" + "VDVDdots=VDx*VDx+VDy*VDy+VDz*VDz;") + vectors = ("VAx=Px-BPLx;VAy=Py-BPLy;VAz=Pz-BPLz;" + "VDx=BPRx-BPLx;VDy=BPRy-BPLy;VDz=BPRz-BPLz;") + particles = "BPLx=x1;BPLy=y1;BPLz=z1;BPRx=x2;BPRy=y2;BPRz=z2;Px=x3;Py=y3;Pz=z3" + expression = f"{energy}{ratio_bp}{ratio_dots}{vectors}{particles}" + + force = openmm.CustomCentroidBondForce(3, expression) + force.addGroup(self.base_pair_left_indicies) + force.addGroup(self.base_pair_right_indicies) + force.addGroup(self.protein_indices) + force.addBond([0, 1, 2]) + force.setForceGroup(self.force_group) + self.force = force + + def defineInteraction(self): + pass + + +class AMHgoProteinDNA(ProteinDNAForce): + """Structure-based (AMH-Go) contact potential between a protein and DNA. (Xinyu) + + Adds a Gaussian well at each native contact listed in ``contact_file`` + (columns: protein resSeq, DNA resSeq, native distance in angstrom, optional + weight) between a protein CB (or CA) atom and a DNA base atom. The well depth + at the native distance is ``k_3spn2 * k_amhgo_PD * gamma_ij``. + """ + def __init__(self, dna, protein, chain_protein='A', chain_DNA='B', k_amhgo_PD=1*unit.kilocalorie_per_mole, + k_3spn2=1.0, sigma_sq=0.05*unit.nanometers**2, aaweight=False, globalct=True, cutoff=1.8, + contact_file="contact_protein_DNA.dat", force_group=force_groups.AMH_GO): + self.force_group = force_group + self.k_amhgo_PD = k_amhgo_PD + self.k_3spn2 = k_3spn2 + self.sigma_sq = sigma_sq + self.chain_protein = chain_protein + self.chain_DNA = chain_DNA + self.aaweight = aaweight + self.cutoff = cutoff + self.globalct = globalct + self.contact_file = contact_file + super().__init__(dna, protein) + + def reset(self): + cutoff = self.cutoff + k_3spn2 = self.k_3spn2 + if self.globalct: + amhgoForce = openmm.CustomBondForce(f"-k_amhgo_PD*gamma_ij*exp(-(r-r_ijN)^2/(2*sigma_sq))*step({cutoff}-r)") + else: + amhgoForce = openmm.CustomBondForce(f"-k_amhgo_PD*gamma_ij*exp(-(r-r_ijN)^2/(2*sigma_sq))*step(r_ijN+{cutoff}-r)") + amhgoForce.addGlobalParameter("k_amhgo_PD", k_3spn2 * self.k_amhgo_PD) + amhgoForce.addGlobalParameter("sigma_sq", self.sigma_sq) + amhgoForce.addPerBondParameter("gamma_ij") + amhgoForce.addPerBondParameter("r_ijN") + amhgoForce.setUsesPeriodicBoundaryConditions(self.periodic) + amhgoForce.setForceGroup(self.force_group) # one cutoff per force group + self.force = amhgoForce + + def defineInteraction(self): + atoms = self.dna.atoms.copy() + atoms['index'] = atoms.index + atoms.index = zip(atoms['chainID'], atoms['resSeq'], atoms['name']) + + contact_list = np.loadtxt(self.contact_file) + for i in range(len(contact_list)): + if self.aaweight: + gamma_ij = contact_list[i][3] + else: + gamma_ij = 1.0 + if (self.chain_protein, int(contact_list[i][0]), 'CB') in atoms.index: + CB_protein = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['resSeq'] == int(contact_list[i][0])) & (atoms['name'] == 'CB') & atoms['resname'].isin(_proteinResidues)].copy() + else: + CB_protein = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['resSeq'] == int(contact_list[i][0])) & (atoms['name'] == 'CA') & atoms['resname'].isin(_proteinResidues)].copy() + base_DNA = atoms[(atoms['chainID'] == self.chain_DNA) & (atoms['resSeq'] == int(contact_list[i][1])) & (atoms['name'].isin(['A', 'T', 'G', 'C'])) & atoms['resname'].isin(_dnaResidues)].copy() + r_ijN = contact_list[i][2] / 10.0 * unit.nanometers + self.force.addBond(int(CB_protein['index'].values[0]), int(base_DNA['index'].values[0]), [gamma_ij, r_ijN]) + logger.debug('AMHgo contact: %s %s %s', int(CB_protein['index'].values[0]), + int(base_DNA['index'].values[0]), [gamma_ij, r_ijN]) diff --git a/open3SPN2/force/collective_variables.py b/open3SPN2/force/collective_variables.py new file mode 100644 index 0000000..8489332 --- /dev/null +++ b/open3SPN2/force/collective_variables.py @@ -0,0 +1,150 @@ +"""Collective-variable readouts for protein-DNA systems. + +Each class reports a geometric quantity (a distance, a position, a base-pair +offset) as an OpenMM force whose "energy" is that quantity -- there is no biasing +of the dynamics. They are meant to be read per force group (or embedded in a +``CustomCVForce``) for analysis or umbrella sampling, and like the biases in +:mod:`open3SPN2.force.bias` they are opt-in, never auto-added. + +Because the reported value is carried as an identity energy, these forces use +``force_groups.MEASUREMENT`` (outside ``force_groups.TOTAL_ENERGY``) so they do +not contaminate the total potential energy. +""" + +import logging + +import openmm +import openmm.unit as unit + +from .template import DNAForce, ProteinDNAForce +from .dna import select_residue_atom_indices +from .protein_dna import select_string_groups +from . import force_groups + +logger = logging.getLogger(__name__) + + +class DistanceFromPoint(DNAForce): + """Report the distance from the centroid of selected atoms to a point. + + Reports ``|centroid - r0|`` (Euclidean distance, nm), where the centroid is + the unweighted mean of the selected atoms and ``r0 = (x0, y0, z0)``. Readout + companion of :class:`open3SPN2.force.bias.PositionRestraint`. + + Parameters + ---------- + appliedToResidues : sequence of int or None + ``resSeq`` values of the residues to include; ``None`` uses every atom. + """ + + def __init__(self, dna, x0=1.0 * unit.nanometer, y0=1.0 * unit.nanometer, + z0=1.0 * unit.nanometer, appliedToResidues=None, + force_group=force_groups.MEASUREMENT): + self.force_group = force_group + self.x0 = x0 + self.y0 = y0 + self.z0 = z0 + self.appliedToResidues = appliedToResidues + super().__init__(dna) + + def reset(self): + x0 = self.x0.value_in_unit(unit.nanometer) + y0 = self.y0.value_in_unit(unit.nanometer) + z0 = self.z0.value_in_unit(unit.nanometer) + + cx = openmm.CustomExternalForce("weight * x") + cy = openmm.CustomExternalForce("weight * y") + cz = openmm.CustomExternalForce("weight * z") + for cv in (cx, cy, cz): + cv.addPerParticleParameter("weight") + + distance = openmm.CustomCVForce( + f"sqrt((cx-({x0}))^2 + (cy-({y0}))^2 + (cz-({z0}))^2)" + ) + distance.addCollectiveVariable("cx", cx) + distance.addCollectiveVariable("cy", cy) + distance.addCollectiveVariable("cz", cz) + distance.setForceGroup(self.force_group) + self.force = distance + + def defineInteraction(self): + indices = select_residue_atom_indices(self.dna, self.appliedToResidues) + if len(indices) == 0: + raise ValueError("No atoms selected for DistanceFromPoint; check appliedToResidues.") + weight = 1.0 / len(indices) + for cv in range(3): + collective_variable = self.force.getCollectiveVariable(cv) + for i in indices: + collective_variable.addParticle(i, [weight]) + + +class StringLength(ProteinDNAForce): + """Report the centroid-centroid distance of a protein-DNA string. + + Reports ``distance(g1, g2)`` between the protein ``CA`` centroid and the DNA + ``S`` centroid. Readout companion of + :class:`open3SPN2.force.bias.StringProteinDNA`; the chain arguments accept one + or many chains. + """ + + def __init__(self, dna, protein, chain_protein='A', chain_DNA='B', + protein_seg=False, group=None, force_group=force_groups.MEASUREMENT): + self.chain_protein = chain_protein + self.chain_DNA = chain_DNA + self.protein_seg = protein_seg + self.group = group or [] + self.force_group = force_group + super().__init__(dna, protein) + + def reset(self): + force = openmm.CustomCentroidBondForce(2, "distance(g1,g2)") + force.setForceGroup(self.force_group) + self.force = force + + def defineInteraction(self): + protein_index, dna_index = select_string_groups( + self.dna, self.chain_protein, self.chain_DNA, self.protein_seg, self.group) + self.force.addGroup(protein_index) + self.force.addGroup(dna_index) + self.force.addBond([0, 1]) + + +class BasePairProteinPosition(ProteinDNAForce): + """Report a protein point's position along a DNA stretch, in base pairs. + + Reports the base-pair offset ``i`` of the protein point's projection onto the + left->right base-pair vector (``i = 0`` at the midpoint). Readout companion of + :class:`open3SPN2.force.bias.BasePairProteinHarmonicBias`. + """ + + def __init__(self, dna, protein, base_pair_left_indicies, base_pair_right_indicies, + protein_indices, base_pair_sep=4, force_group=force_groups.MEASUREMENT): + self.base_pair_left_indicies = base_pair_left_indicies + self.base_pair_right_indicies = base_pair_right_indicies + self.protein_indices = protein_indices + self.base_pair_sep = base_pair_sep + self.force_group = force_group + super().__init__(dna, protein) + + def reset(self): + # Reported value: i = base-pair offset of the protein projection. + energy = "i;" + ratio_bp = f"i={self.base_pair_sep}*(ratio-0.5);" + ratio_dots = ("ratio=VAVDdots/VDVDdots;" + "VAVDdots=VAx*VDx+VAy*VDy+VAz*VDz;" + "VDVDdots=VDx*VDx+VDy*VDy+VDz*VDz;") + vectors = ("VAx=Px-BPLx;VAy=Py-BPLy;VAz=Pz-BPLz;" + "VDx=BPRx-BPLx;VDy=BPRy-BPLy;VDz=BPRz-BPLz;") + particles = "BPLx=x1;BPLy=y1;BPLz=z1;BPRx=x2;BPRy=y2;BPRz=z2;Px=x3;Py=y3;Pz=z3" + expression = f"{energy}{ratio_bp}{ratio_dots}{vectors}{particles}" + + force = openmm.CustomCentroidBondForce(3, expression) + force.addGroup(self.base_pair_left_indicies) + force.addGroup(self.base_pair_right_indicies) + force.addGroup(self.protein_indices) + force.addBond([0, 1, 2]) + force.setForceGroup(self.force_group) + self.force = force + + def defineInteraction(self): + pass diff --git a/open3SPN2/force/dna.py b/open3SPN2/force/dna.py index cf3b61e..4834729 100644 --- a/open3SPN2/force/dna.py +++ b/open3SPN2/force/dna.py @@ -1,17 +1,21 @@ import numpy as np import pandas import itertools - +import logging import openmm import openmm.unit as unit from .template import DNAForce +from . import force_groups + +logger = logging.getLogger(__name__) + _af = 1 * unit.degree / unit.radian # angle scaling factor _dnaResidues = ['DA', 'DC', 'DT', 'DG'] _complement = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'} class Bond(DNAForce, openmm.CustomBondForce): - def __init__(self, dna, force_group=6, OpenCLPatch=True): + def __init__(self, dna, force_group=force_groups.BOND, OpenCLPatch=True): self.force_group = force_group super().__init__(dna, OpenCLPatch=OpenCLPatch) @@ -45,7 +49,7 @@ def defineInteraction(self): class Angle(DNAForce, openmm.HarmonicAngleForce): - def __init__(self, dna, force_group=7, OpenCLPatch=True): + def __init__(self, dna, force_group=force_groups.ANGLE, OpenCLPatch=True): self.force_group = force_group super().__init__(dna, OpenCLPatch=OpenCLPatch) @@ -62,7 +66,7 @@ def defineInteraction(self): self.force.addAngle(int(a['aai']), int(a['aaj']), int(a['aak']), *parameters) class Stacking(DNAForce, openmm.CustomCompoundBondForce): - def __init__(self, dna, force_group=8, OpenCLPatch=True): + def __init__(self, dna, force_group=force_groups.STACKING, OpenCLPatch=True): self.force_group = force_group super().__init__(dna, OpenCLPatch=OpenCLPatch) @@ -97,7 +101,7 @@ def defineInteraction(self): self.force.addBond([a['aai'], a['aaj'], a['aak']], parameters) class Dihedral(DNAForce, openmm.CustomTorsionForce): - def __init__(self, dna, force_group=9, OpenCLPatch=True): + def __init__(self, dna, force_group=force_groups.DIHEDRAL, OpenCLPatch=True): self.force_group = force_group super().__init__(dna, OpenCLPatch=OpenCLPatch) @@ -128,7 +132,7 @@ def defineInteraction(self): class BasePair(DNAForce, openmm.CustomHbondForce): - def __init__(self, dna, force_group=10, OpenCLPatch=True): + def __init__(self, dna, force_group=force_groups.BASE_PAIR, OpenCLPatch=True): self.force_group = force_group super().__init__(dna, OpenCLPatch=OpenCLPatch) @@ -189,7 +193,7 @@ def defineInteraction(self): except KeyError: for c, r, n in D1.index: if (c, r, 'S') not in atoms.index: - print(f'Residue {c}:{r} does not have a Sugar atom (S)') + logger.warning('Residue %s:%s does not have a Sugar atom (S)', c, r) raise KeyError try: @@ -197,7 +201,7 @@ def defineInteraction(self): except KeyError: for c, r, n in A1.index: if (c, r, 'S') not in atoms.index: - print(f'Residue {c}:{r} does not have a Sugar atom (S)') + logger.warning('Residue %s:%s does not have a Sugar atom (S)', c, r) raise KeyError D1_list = list(D1['index']) @@ -243,7 +247,7 @@ def addForce(self, system): class CrossStacking(DNAForce): - def __init__(self, dna, force_group=11, OpenCLPatch=True): + def __init__(self, dna, force_group=force_groups.CROSS_STACKING, OpenCLPatch=True): self.force_group = force_group super().__init__(dna, OpenCLPatch=OpenCLPatch) @@ -440,8 +444,21 @@ def addNonBondedExclusions(dna, force, OpenCLPatch=True): force.addExclusion(i, j) +def select_residue_atom_indices(dna, appliedToResidues=None): + """System particle indices of the atoms in the requested residues. + + ``appliedToResidues`` is a list of ``resSeq`` values; ``None`` selects every + atom. Used by the centroid-based restraint and readout forces. + """ + atoms = dna.atoms + if appliedToResidues is None: + return [int(i) for i in atoms.index] + mask = atoms['resSeq'].isin(list(appliedToResidues)) + return [int(i) for i in atoms.index[mask]] + + class Exclusion(DNAForce, openmm.CustomNonbondedForce): - def __init__(self, dna, force_group = 12, OpenCLPatch=True): + def __init__(self, dna, force_group = force_groups.EXCLUSION_DNA, OpenCLPatch=True): self.force_group = force_group super().__init__(dna, OpenCLPatch=OpenCLPatch) @@ -487,10 +504,12 @@ def defineInteraction(self): class Electrostatics(DNAForce, openmm.CustomNonbondedForce): - def __init__(self, dna, force_group=13, temperature=300*unit.kelvin, salt_concentration=100*unit.millimolar, OpenCLPatch=True): + def __init__(self, dna, force_group=force_groups.ELECTROSTATICS_DNA, temperature=300*unit.kelvin, salt_concentration=100*unit.millimolar, ldby = None, cutoff_distance = None, OpenCLPatch=True): self.force_group = force_group self.T = temperature self.C = salt_concentration + self.ldby = ldby + self.cutoff_distance = cutoff_distance super().__init__(dna, OpenCLPatch=OpenCLPatch) def reset(self): @@ -506,11 +525,23 @@ def reset(self): ec = 1.60217653E-19 * unit.coulomb # proton charge pv = 8.8541878176E-12 * unit.farad / unit.meter # dielectric permittivity of vacuum - ldby = np.sqrt(dielectric * pv * kb * T / (2.0 * Na * ec ** 2 * C)) + if self.ldby is None: + ldby = np.sqrt(dielectric * pv * kb * T / (2.0 * Na * ec ** 2 * C)) + else: + ldby = self.ldby + ldby = ldby.in_units_of(unit.nanometer) + + if self.cutoff_distance == None: + cutoff_distance = 5 * unit.nanometer # for backward compatibility + else: + cutoff_distance = self.cutoff_distance + + cutoff_nm = cutoff_distance.value_in_unit(unit.nanometer) + denominator = 4 * np.pi * pv * dielectric / (Na * ec ** 2) denominator = denominator.in_units_of(unit.kilocalorie_per_mole**-1 * unit.nanometer**-1) - #print(ldby, denominator) + logger.debug('DNA electrostatics: ldby=%s, denominator=%s', ldby, denominator) electrostaticForce = openmm.CustomNonbondedForce("""energy; energy=q1*q2*exp(-r/dh_length)/denominator/r;""") @@ -518,7 +549,9 @@ def reset(self): electrostaticForce.addGlobalParameter('dh_length', ldby) electrostaticForce.addGlobalParameter('denominator', denominator) - electrostaticForce.setCutoffDistance(5) + electrostaticForce.setCutoffDistance(cutoff_nm) + logger.debug('DNA screening length %s nm', ldby) + logger.debug('DNA electrostatic cutoff %s nm', electrostaticForce.getCutoffDistance()) if self.periodic: electrostaticForce.setNonbondedMethod(electrostaticForce.CutoffPeriodic) else: @@ -546,4 +579,5 @@ def defineInteraction(self): self.force.addParticle(parameters) # add neighbor exclusion - addNonBondedExclusions(self.dna, self.force, self.OpenCLPatch) \ No newline at end of file + addNonBondedExclusions(self.dna, self.force, self.OpenCLPatch) + diff --git a/open3SPN2/force/force_groups.py b/open3SPN2/force/force_groups.py new file mode 100644 index 0000000..0ce4562 --- /dev/null +++ b/open3SPN2/force/force_groups.py @@ -0,0 +1,51 @@ +"""Default OpenMM force-group numbers used across open3SPN2. + +Every force accepts a ``force_group`` argument; the values here are only the +defaults. Centralizing them keeps the analysis energy decomposition in sync with +the force classes and documents which of the 32 slots are in use. + +``TOTAL_ENERGY`` (groups 5-31) are summed to report the total potential energy. +Measurement-only readouts use ``MEASUREMENT`` (below 5) so they do not contribute +to that total. A ``CustomNonbondedForce`` carries one cutoff distance per force +group, so two non-bonded forces with different cutoffs must use different groups. +""" + +# Measurement / analysis readouts (excluded from the energy total) +Q = 1 +Rg = 2 +MEASUREMENT = 4 + +# DNA (3SPN2) terms +BOND = 6 +ANGLE = 7 +STACKING = 8 +DIHEDRAL = 9 +BASE_PAIR = 10 +CROSS_STACKING = 11 +EXCLUSION_DNA = 12 +ELECTROSTATICS_DNA = 13 + +# Protein-DNA terms +EXCLUSION_PROTEIN_DNA = 14 +ELECTROSTATICS_PROTEIN_DNA = 15 +PROTEIN_DNA_BIAS = 16 + +# AWSEM protein terms +BURIAL = 17 +HELIX = 18 +PULLING = 19 +BACKBONE = 20 +RAMA = 21 +CONTACT = 22 +FRAGMENT = 23 +MEMBRANE = 24 +ER = 25 +TBM_Q = 26 +BETA = 27 +PAP = 28 +HELICAL = 29 +DEBYE_HUCKEL = 30 +AMH_GO = 31 + +# Force groups summed to give the total potential energy. +TOTAL_ENERGY = range(5, 32) diff --git a/open3SPN2/force/protein_dna.py b/open3SPN2/force/protein_dna.py index d3e4f60..cbab0a7 100644 --- a/open3SPN2/force/protein_dna.py +++ b/open3SPN2/force/protein_dna.py @@ -1,3 +1,5 @@ +import logging + import numpy as np import openmm @@ -5,15 +7,52 @@ import pandas from .template import ProteinDNAForce from .dna import addNonBondedExclusions +from . import force_groups + +logger = logging.getLogger(__name__) + _af = 1 * unit.degree / unit.radian # angle scaling factor _dnaResidues = ['DA', 'DC', 'DT', 'DG'] _proteinResidues = ['IPR', 'IGL', 'NGP'] + +def select_string_groups(dna, chain_protein='A', chain_DNA='B', protein_seg=False, group=None): + """Pick the two centroid groups for a protein-DNA string/pulling potential. + + Returns ``(protein_CA_indices, dna_S_indices)``: the protein alpha-carbon + (``CA``) atoms and the DNA sugar (``S``) atoms that define the ends of the + pulling vector. ``chain_protein`` / ``chain_DNA`` accept a single chain id + (``'A'``) or several (``'AB'`` or ``['A', 'B']``); every matching chain is + pooled into one group. When ``protein_seg`` is true only the ``CA`` atoms at + the positions listed in ``group`` (indices into the selected ``CA`` list) are + used. + """ + atoms = dna.atoms.copy() + atoms['index'] = atoms.index + CA_atoms = atoms[ + atoms['chainID'].isin(list(chain_protein)) & + (atoms['name'] == 'CA') & + atoms['resname'].isin(_proteinResidues) + ] + S_atoms = atoms[ + atoms['chainID'].isin(list(chain_DNA)) & + (atoms['name'] == 'S') & + atoms['resname'].isin(_dnaResidues) + ] + CA_index = [int(i) for i in CA_atoms['index']] + S_index = [int(i) for i in S_atoms['index']] + if protein_seg: + CA_index = [CA_index[x] for x in (group or [])] + return CA_index, S_index + + class ExclusionProteinDNA(ProteinDNAForce): """ Protein-DNA exclusion potential""" - def __init__(self, dna, protein, k=1, force_group=14): + def __init__(self, dna, protein, k=1, radius_override = None, cutoff = 1.55, force_group=force_groups.EXCLUSION_PROTEIN_DNA): self.k = k self.force_group = force_group + self.radius_override = radius_override + self.cutoff = cutoff #cutoff is in nm super().__init__(dna, protein) def reset(self): @@ -27,13 +66,14 @@ def reset(self): exclusionForce.addPerParticleParameter('epsilon') exclusionForce.addPerParticleParameter('sigma') exclusionForce.addPerParticleParameter('cutoff') - exclusionForce.setCutoffDistance(1.55) + exclusionForce.setCutoffDistance(self.cutoff) # exclusionForce.setUseLongRangeCorrection(True) exclusionForce.setForceGroup(self.force_group) # There can not be multiple cutoff distance on the same force group if self.periodic: exclusionForce.setNonbondedMethod(exclusionForce.CutoffPeriodic) else: exclusionForce.setNonbondedMethod(exclusionForce.CutoffNonPeriodic) + logger.debug('protein-DNA exclusion cutoff %s', exclusionForce.getCutoffDistance()) self.force = exclusionForce def defineInteraction(self): @@ -61,18 +101,28 @@ def defineInteraction(self): for i, atom in atoms.iterrows(): if atom.is_dna: param = particle_definition.loc['DNA' + atom['name']] - parameters = [param.epsilon, - param.radius, - param.cutoff] + if self.radius_override == None: + parameters = [param.epsilon, + param.radius, + param.cutoff] + else: + parameters = [param.epsilon, + self.radius_override, + param.cutoff] DNA_list += [i] elif atom.is_protein: param = particle_definition.loc['Protein' + atom['name']] - parameters = [param.epsilon, - param.radius, - param.cutoff] + if self.radius_override == None: + parameters = [param.epsilon, + param.radius, + param.cutoff] + else: + parameters = [param.epsilon, + self.radius_override, + param.cutoff] protein_list += [i] else: - print(f'Residue {i} not included in protein-DNA interactions') + logger.warning('Residue %s not included in protein-DNA interactions', i) parameters = [0, .1,.1] atoms.loc[i, ['epsilon', 'radius', 'cutoff']] = parameters self.atoms = atoms @@ -85,9 +135,11 @@ def defineInteraction(self): class ElectrostaticsProteinDNA(ProteinDNAForce): """DNA-protein and protein-protein electrostatics.""" - def __init__(self, dna, protein, k=1, force_group=15): + def __init__(self, dna, protein, k=1, ldby = 1.2 * unit.nanometer, cutoff_distance = None, force_group=force_groups.ELECTROSTATICS_PROTEIN_DNA): self.k = k self.force_group = force_group + self.ldby = ldby + self.cutoff_distance = cutoff_distance super().__init__(dna, protein) def reset(self): @@ -98,11 +150,12 @@ def reset(self): ec = 1.60217653E-19 * unit.coulomb # proton charge pv = 8.8541878176E-12 * unit.farad / unit.meter # dielectric permittivity of vacuum - ldby = 1.2 * unit.nanometer # np.sqrt(dielectric * pv * kb * T / (2.0 * Na * ec ** 2 * C)) + #ldby = 1.2 * unit.nanometer # np.sqrt(dielectric * pv * kb * T / (2.0 * Na * ec ** 2 * C)) denominator = 4 * np.pi * pv * dielectric / (Na * ec ** 2) denominator = denominator.in_units_of(unit.kilocalorie_per_mole**-1 * unit.nanometer**-1) #print(ldby, denominator) k = self.k + ldby = self.ldby electrostaticForce = openmm.CustomNonbondedForce(f"""k_electro_protein_DNA*energy; energy=q1*q2*exp(-r/inter_dh_length)/inter_denominator/r;""") electrostaticForce.addPerParticleParameter('q') @@ -110,7 +163,16 @@ def reset(self): electrostaticForce.addGlobalParameter('inter_dh_length', ldby) electrostaticForce.addGlobalParameter('inter_denominator', denominator) - electrostaticForce.setCutoffDistance(4) + if self.cutoff_distance == None: + cutoff_distance = 4 * unit.nanometer # for backward compatibility + else: + cutoff_distance = self.cutoff_distance + + cutoff_nm = cutoff_distance.value_in_unit(unit.nanometer) + + electrostaticForce.setCutoffDistance(cutoff_nm) + logger.debug('protein-DNA screening length %s nm', ldby) + logger.debug('protein-DNA electrostatic cutoff %s nm', electrostaticForce.getCutoffDistance()) if self.periodic: electrostaticForce.setNonbondedMethod(electrostaticForce.CutoffPeriodic) else: @@ -160,7 +222,7 @@ def defineInteraction(self): protein_list += [i] #print(atom.chainID, atom.resSeq, atom.resname, atom['name'], charge) else: - print(f'Residue {i} not included in protein-DNA electrostatics') + logger.warning('Residue %s not included in protein-DNA electrostatics', i) parameters = [0] # No charge if it is not DNA # print (i,parameters) self.force.addParticle(parameters) @@ -170,160 +232,3 @@ def defineInteraction(self): # addExclusions addNonBondedExclusions(self.dna, self.force) - -class AMHgoProteinDNA(ProteinDNAForce): - """ Protein-DNA amhgo potential""" - def __init__(self, dna, protein, chain_protein='A', chain_DNA='B', k_amhgo_PD=1*unit.kilocalorie_per_mole, sigma_sq=0.05*unit.nanometers**2, aaweight=False, globalct=True, cutoff=1.8, force_group=16): - self.force_group = force_group - self.k_amhgo_PD = k_amhgo_PD - self.sigma_sq= sigma_sq - self.chain_protein = chain_protein - self.chain_DNA = chain_DNA - self.aaweight = aaweight - self.cutoff = cutoff - self.globalct = globalct - super().__init__(dna, protein) - - def reset(self): - cutoff = self.cutoff - k_3spn2 = self.k_3spn2 - if self.globalct: - amhgoForce = openmm.CustomBondForce(f"-k_amhgo_PD*gamma_ij*exp(-(r-r_ijN)^2/(2*sigma_sq))*step({cutoff}-r)") - else: - amhgoForce = openmm.CustomBondForce(f"-k_amhgo_PD*gamma_ij*exp(-(r-r_ijN)^2/(2*sigma_sq))*step(r_ijN+{cutoff}-r)") - amhgoForce.addGlobalParameter("k_amhgo_PD", k_3spn2*self.k_amhgo_PD) - amhgoForce.addGlobalParameter("sigma_sq", self.sigma_sq) - amhgoForce.addPerBondParameter("gamma_ij") - amhgoForce.addPerBondParameter("r_ijN") - amhgoForce.setUsesPeriodicBoundaryConditions(self.periodic) - amhgoForce.setForceGroup(self.force_group) # There can not be multiple cutoff distance on the same force group - self.force = amhgoForce - - def defineInteraction(self): - atoms = self.dna.atoms.copy() - atoms['index'] = atoms.index - atoms.index = zip(atoms['chainID'], atoms['resSeq'], atoms['name']) - - contact_list = np.loadtxt("contact_protein_DNA.dat") - for i in range(len(contact_list)): - if self.aaweight: - gamma_ij = contact_list[i][3] - else: - gamma_ij = 1.0 - if (self.chain_protein, int(contact_list[i][0]), 'CB') in atoms.index: - CB_protein = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['resSeq'] == int(contact_list[i][0])) & (atoms['name'] == 'CB') & atoms['resname'].isin(_proteinResidues)].copy() - else: - CB_protein = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['resSeq'] == int(contact_list[i][0])) & (atoms['name'] == 'CA') & atoms['resname'].isin(_proteinResidues)].copy() - base_DNA = atoms[(atoms['chainID'] == self.chain_DNA) & (atoms['resSeq'] == int(contact_list[i][1])) & (atoms['name'].isin(['A', 'T', 'G', 'C'])) & atoms['resname'].isin(_dnaResidues)].copy() - r_ijN = contact_list[i][2]/10.0*unit.nanometers - self.force.addBond(int(CB_protein['index'].values[0]), int(base_DNA['index'].values[0]), [gamma_ij, r_ijN]) - print(int(CB_protein['index'].values[0]), int(base_DNA['index'].values[0]), [gamma_ij, r_ijN]) - - -#class AMHgoProteinDNA(ProteinDNAForce): -# """ Protein-DNA amhgo potential (Xinyu)""" -# def __init__(self, dna, protein, chain_protein='A', chain_DNA='B', k_amhgo_PD=1*unit.kilocalorie_per_mole, -# sigma_sq=0.05*unit.nanometers**2, aaweight=False, cutoff=1.8, force_group=16): -# self.force_group = force_group -# self.k_amhgo_PD = k_amhgo_PD -# self.sigma_sq= sigma_sq -# self.chain_protein = chain_protein -# self.chain_DNA = chain_DNA -# self.aaweight = aaweight -# self.cutoff = cutoff -# super().__init__(dna, protein) -# -# def reset(self): -# cutoff = self.cutoff -# amhgoForce = openmm.CustomBondForce(f"-k_amhgo_PD*gamma_ij*exp(-(r-r_ijN)^2/(2*sigma_sq))*step({cutoff}-r)") -# amhgoForce.addGlobalParameter("k_amhgo_PD", self.k_amhgo_PD) -# amhgoForce.addGlobalParameter("sigma_sq", self.sigma_sq) -# amhgoForce.addPerBondParameter("gamma_ij") -# amhgoForce.addPerBondParameter("r_ijN") -# amhgoForce.setUsesPeriodicBoundaryConditions(self.periodic) -# amhgoForce.setForceGroup(self.force_group) # There can not be multiple cutoff distance on the same force group -# self.force = amhgoForce -# -# def defineInteraction(self): -# atoms = self.dna.atoms.copy() -# atoms['index'] = atoms.index -# atoms.index = zip(atoms['chainID'], atoms['resSeq'], atoms['name']) -# -# contact_list = np.loadtxt("contact_protein_DNA.dat") -# for i in range(len(contact_list)): -# if self.aaweight: gamma_ij = contact_list[i][3] -# else: gamma_ij = 1.0 -# if (self.chain_protein, int(contact_list[i][0]), 'CB') in atoms.index: -# CB_protein = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['resSeq'] == int(contact_list[i][0])) & -# (atoms['name'] == 'CB') & atoms['resname'].isin(_proteinResidues)].copy() -# else: -# CB_protein = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['resSeq'] == int(contact_list[i][0])) & -# (atoms['name'] == 'CA') & atoms['resname'].isin(_proteinResidues)].copy() -# base_DNA = atoms[(atoms['chainID'] == self.chain_DNA) & (atoms['resSeq'] == int(contact_list[i][1])) & (atoms['name'].isin(['A', 'T', 'G', 'C'])) & atoms['resname'].isin(_dnaResidues)].copy() -# r_ijN = contact_list[i][2]/10.0*unit.nanometers -# self.force.addBond(int(CB_protein['index'].values[0]), int(base_DNA['index'].values[0]), [gamma_ij, r_ijN]) -# print(int(CB_protein['index'].values[0]), int(base_DNA['index'].values[0]), [gamma_ij, r_ijN]) - -class StringProteinDNA(ProteinDNAForce): - """ Protein-DNA string potential (Xinyu)""" - def __init__(self, dna, protein, r0, chain_protein='A', chain_DNA='B', k_string_PD=10*4.184, protein_seg=False, group=[]): - self.k_string_PD = k_string_PD - self.chain_protein = chain_protein - self.chain_DNA = chain_DNA - self.r0 = r0 - self.protein_seg = protein_seg - self.group = group - super().__init__(dna, protein) - - def reset(self): - r0=self.r0 - k_string_PD=self.k_string_PD - stringForce = openmm.CustomCentroidBondForce(2, f"0.5*{k_string_PD}*(distance(g1,g2)-{r0})^2") - self.force = stringForce - print("String_PD bias on: r0, k_string = ", r0, k_string_PD) - - def defineInteraction(self): - atoms = self.dna.atoms.copy() - atoms['index'] = atoms.index - CA_atoms = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['name'] == 'CA') & atoms['resname'].isin(_proteinResidues)].copy() - S_atoms = atoms[(atoms['chainID'] == self.chain_DNA) & (atoms['name'] == 'S') & atoms['resname'].isin(_dnaResidues)].copy() - CA_index = [int(atom.index) for atom in CA_atoms.itertuples()] - if self.protein_seg: self.force.addGroup([CA_index[x] for x in self.group]) - else: self.force.addGroup(CA_index) - self.force.addGroup([int(atom.index) for atom in S_atoms.itertuples()]) - bondGroups = [0, 1] - print(self.force.getGroupParameters(0)) - print(self.force.getGroupParameters(1)) - - self.force.addBond(bondGroups) - - -class String_length_ProteinDNA(ProteinDNAForce): - """ Protein-DNA string potential (Xinyu)""" - def __init__(self, dna, protein, chain_protein='A', chain_DNA='B', protein_seg=False, group=[], force_group=17): - self.force_group = force_group - self.chain_protein = chain_protein - self.chain_DNA = chain_DNA - self.protein_seg = protein_seg - self.group = group - super().__init__(dna, protein) - - def reset(self): - length = openmm.CustomCentroidBondForce(2, "distance(g1,g2)") - length.setForceGroup(self.force_group) - self.force = length - - def defineInteraction(self): - atoms = self.dna.atoms.copy() - atoms['index'] = atoms.index - CA_atoms = atoms[(atoms['chainID'] == self.chain_protein) & (atoms['name'] == 'CA') & atoms['resname'].isin(_proteinResidues)].copy() - S_atoms = atoms[(atoms['chainID'] == self.chain_DNA) & (atoms['name'] == 'S') & atoms['resname'].isin(_dnaResidues)].copy() - CA_index = [int(atom.index) for atom in CA_atoms.itertuples()] - if self.protein_seg: self.force.addGroup([CA_index[x] for x in self.group]) - else: self.force.addGroup(CA_index) - self.force.addGroup([int(atom.index) for atom in S_atoms.itertuples()]) - bondGroups = [0, 1] - print(self.force.getGroupParameters(0)) - print(self.force.getGroupParameters(1)) - - self.force.addBond(bondGroups) \ No newline at end of file diff --git a/open3SPN2/optional_forces.py b/open3SPN2/optional_forces.py new file mode 100644 index 0000000..220948d --- /dev/null +++ b/open3SPN2/optional_forces.py @@ -0,0 +1,75 @@ +"""Opt-in bias and collective-variable forces, resolved lazily. + +These forces are not part of the core force field and are never auto-added. They +are resolved on demand through the :data:`optional_forces` mapping so that an +import error in one of the optional modules cannot break ``import open3SPN2``: +the error surfaces only when that specific force is requested. + +Examples +-------- +>>> import open3SPN2 +>>> open3SPN2.optional_forces.available() +['BasePairProteinHarmonicBias', 'BasePairProteinPosition', 'DistanceFromPoint', ...] +>>> PositionRestraint = open3SPN2.optional_forces['PositionRestraint'] + +Equivalently, the force can be imported straight from its submodule: + +>>> from open3SPN2.force.bias import PositionRestraint +""" + +import importlib + +try: + from collections.abc import Mapping +except ImportError: # pragma: no cover - Python 2 fallback + from collections import Mapping + +# force name -> (submodule, attribute) +_REGISTRY = { + # biases (apply energy to steer/restrain the system) + "PositionRestraint": ("open3SPN2.force.bias", "PositionRestraint"), + "StringProteinDNA": ("open3SPN2.force.bias", "StringProteinDNA"), + "ElectrostaticBias": ("open3SPN2.force.bias", "ElectrostaticBias"), + "BasePairProteinHarmonicBias": ("open3SPN2.force.bias", "BasePairProteinHarmonicBias"), + "AMHgoProteinDNA": ("open3SPN2.force.bias", "AMHgoProteinDNA"), + # collective-variable readouts (identity energy) + "DistanceFromPoint": ("open3SPN2.force.collective_variables", "DistanceFromPoint"), + "StringLength": ("open3SPN2.force.collective_variables", "StringLength"), + "BasePairProteinPosition": ("open3SPN2.force.collective_variables", "BasePairProteinPosition"), +} + + +class _LazyForceRegistry(Mapping): + """Mapping of force name -> class, importing the submodule on first access.""" + + def __init__(self, registry): + self._registry = dict(registry) + + def __getitem__(self, name): + if name not in self._registry: + raise KeyError(name) + module_name, attribute = self._registry[name] + return getattr(importlib.import_module(module_name), attribute) + + def __getattr__(self, name): + # Attribute-style access (optional_forces.PositionRestraint); never + # intercept private/dunder lookups, which are not force names. + if name.startswith('_'): + raise AttributeError(name) + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __iter__(self): + return iter(self._registry) + + def __len__(self): + return len(self._registry) + + def available(self): + """Names of the available optional forces (does not import anything).""" + return sorted(self._registry) + + +optional_forces = _LazyForceRegistry(_REGISTRY) diff --git a/open3SPN2/scripts/compute_twist.py b/open3SPN2/scripts/compute_twist.py new file mode 100644 index 0000000..7fffb15 --- /dev/null +++ b/open3SPN2/scripts/compute_twist.py @@ -0,0 +1,118 @@ +#define these functions + +import numpy as np +import math + +def vector(p1, p2): + return [p2[0]-p1[0], p2[1]-p1[1], p2[2]-p1[2]] + +def vadd(p1,p2): + return [p2[0]/2+p1[0]/2, p2[1]/2+p1[1]/2, p2[2]/2+p1[2]/2] + +def vmulti(p1,k): + return [p1[0]*k,p1[1]*k,p1[2]*k] + +def vabs(a): + return math.sqrt(pow(float(a[0]),2)+pow(float(a[1]),2)+pow(float(a[2]),2)) + +def v_product(p1,p2): + return p1[0]*p2[0]+p1[1]*p2[1]+p1[2]*p2[2] + +def vcross_product(a, b): + cx = a[1]*b[2]-a[2]*b[1] + cy = a[2]*b[0]-a[0]*b[2] + cz = a[0]*b[1]-a[1]*b[0] + return [cx, cy, cz] + +#Various methods of calculations of twist. These are similar, using slightly different methods. But they should yield the same results. +def Xun_twist_OG(b_atoms): #original Xun's code + dna_length = int(len(b_atoms)/2) + b1_atoms = b_atoms[0:dna_length] + b2_atoms = b_atoms[dna_length:dna_length*2] + Twist = "" + for i in range(dna_length-1): + b_atom_1 = b_atoms[i] + b_atom_2 = b_atoms[-1-i] + b_atomn_1 = b_atoms[i+1] + b_atomn_2 = b_atoms[-1-i-1] + b_center = vadd(b_atom_1,b_atom_2) + b_centern = vadd(b_atomn_1,b_atomn_2) + zmst=vector(b_center,b_centern) + ys = vector(b_atom_1,b_atom_2) + ysn = vector(b_atomn_1,b_atomn_2) + yr = vector(ys,vmulti(zmst,v_product(ys,zmst))) + yrn = vector(ysn,vmulti(zmst,v_product(ysn,zmst))) + zmsto = vcross_product(yr,yrn) + ozmst = v_product(zmst,zmsto) + #print v_product(yr,yrn)/(vabs(yr)*vabs(yrn)) + twist = math.acos(v_product(yr,yrn)/(vabs(yr)*vabs(yrn)))/3.15159*180 + if twist > 180: + twist = twist - 180 + elif twist < -180: + twist = twist + 180 + if ozmst > 0: + twist = twist + else: + twist = 0 - twist + Twist += str(twist) + " " + return Twist + +def Xun_twist(Vec_1, Vec_2, Vec_3, Vec_4): #Steven's implementation of Xun's code!! + #WARNING: Sign is not taken into account but under in vivo conditions, should be but an edge case! + b_atom_1 = [float(x)/vabs(Vec_1[0]) for x in Vec_1[0]] + b_atom_2 = [float(x)/vabs(Vec_2[0]) for x in Vec_2[0]] + b_atomn_1 = [float(x)/vabs(Vec_3[0]) for x in Vec_3[0]] + b_atomn_2 = [float(x)/vabs(Vec_4[0]) for x in Vec_4[0]] + b_center = vadd(b_atom_1,b_atom_2) + b_centern = vadd(b_atomn_1,b_atomn_2) + zmst=vector(b_center,b_centern) + ys = vector(b_atom_1,b_atom_2) + ysn = vector(b_atomn_1,b_atomn_2) + yr = vector(ys,vmulti(zmst,v_product(ys,zmst))) + yrn = vector(ysn,vmulti(zmst,v_product(ysn,zmst))) + #zmsto = vcross_product(yr,yrn) + #ozmst = v_product(zmst,zmsto) + #print v_product(yr,yrn)/(vabs(yr)*vabs(yrn)) + twist = math.acos(v_product(yr,yrn)/(vabs(yr)*vabs(yrn))) #/3.15159*180 #Conversion from radians to degrees? + return twist + +def Steven_twist(Vec_1, Vec_2, Vec_3, Vec_4): #original Steven's code + x1, y1, z1 = Vec_1[0].astype(float) + x2, y2, z2 = Vec_2[0].astype(float) + x3, y3, z3 = Vec_3[0].astype(float) + x4, y4, z4 = Vec_4[0].astype(float) + + # Calculate vector A, B, and M + Ax, Ay, Az = x2 - x1, y2 - y1, z2 - z1 + Bx, By, Bz = x4 - x3, y4 - y3, z4 - z3 + Mx, My, Mz = (x4 + x3)/2 - (x1 + x2)/2, (y4 + y3)/2 - (y1 + y2)/2, (z4 + z3)/2 - (z1 + z2)/2 + + # Calculate vector C and D + Cx, Cy, Cz = Ay*Mz - Az*My, Az*Mx - Ax*Mz, Ax*My - Ay*Mx + Dx, Dy, Dz = By*Mz - Bz*My, Bz*Mx - Bx*Mz, Bx*My - By*Mx + + # Calculate magnitudes of vectors C and D + Cm = np.sqrt(Cx**2 + Cy**2 + Cz**2) + Dm = np.sqrt(Dx**2 + Dy**2 + Dz**2) + + # Calculate dot product and theta + dot = (Cx*Dx + Cy*Dy + Cz*Dz) / (Cm * Dm) + theta = np.arccos(np.clip(dot, -1, 1)) + + # Define Ex, Ey, and Ez + Ex, Ey, Ez = Cy*Dz-Cz*Dy, Cz*Dx-Cx*Dz, Cx*Dy-Cy*Dx + dot_sign = Ex*Mx+Ey*My+Ez*Mz + + # Calculate the angle according to the provided formula + angle = theta - 2*(np.sign(dot_sign) < 0)*(theta - np.pi) + + return angle + +def diff_twist(Vec_1, Vec_2, Vec_3, Vec_4): + return Steven_twist(Vec_1, Vec_2, Vec_3, Vec_4) - Xun_twist(Vec_1, Vec_2, Vec_3, Vec_4) + +def test_twist(Vec_1, Vec_2, Vec_3, Vec_4): + return Steven_twist(Vec_1, Vec_2, Vec_3, Vec_4) + +if __name__=='__main__': + test_twist() diff --git a/open3SPN2/scripts/forces_setup.py b/open3SPN2/scripts/forces_setup.py new file mode 100644 index 0000000..6723edd --- /dev/null +++ b/open3SPN2/scripts/forces_setup.py @@ -0,0 +1,100 @@ +import openawsem +import open3SPN2 + +from functools import partial + +from openawsem.functionTerms import * +from openawsem.helperFunctions.myFunctions import * + +#Location of the AWSEM information folder, including fragment memories +AWSEM_folder = "/path/to/awsem/directory/for/protein/part" + +#File of fragment memory to be used +fragment = f"single_frags.mem" + +#Native (or other reference) file +reference = f"{AWSEM_folder}/crystal_structure-openmmawsem.pdb" + +def set_up_forces(s,protein, dna, computeQ, AWSEM = AWSEM_folder, fragment = fragment): + # apply forces + forces = {} + + for i in range(s.getNumForces()): + force = s.getForce(i) + force_name="CMMotionRemover" + + #Add 3SPN2 forces + for force_name in open3SPN2.forces: + print(force_name) + force = open3SPN2.forces[force_name](dna) + if force_name in ['BasePair','CrossStacking']: + force.addForce(s) + else: + s.addForce(force) + forces.update({force_name:force}) + + #Add AWSEM forces. Fragment memories are in the protein residue only AWSEM-created folder + frags_dir = AWSEM + openAWSEMforces = dict(Connectivity=openawsem.functionTerms.basicTerms.con_term, + Chain=openawsem.functionTerms.basicTerms.chain_term, + Chi=openawsem.functionTerms.basicTerms.chi_term, + Excl=openawsem.functionTerms.basicTerms.excl_term, + rama=openawsem.functionTerms.basicTerms.rama_term, + rama_pro=openawsem.functionTerms.basicTerms.rama_proline_term, + rama_ssweight = partial(openawsem.functionTerms.basicTerms.rama_ssweight_term, + k_rama_ssweight=2*8.368, + ssweight_file=f"{frags_dir}/ssweight"), + contact=openawsem.functionTerms.contactTerms.contact_term, + frag = partial(openawsem.functionTerms.templateTerms.fragment_memory_term, + frag_file_list_file=f"{frags_dir}/{fragment}", + UseSavedFragTable=False, + k_fm=0.04184), + beta1 = openawsem.functionTerms.hydrogenBondTerms.beta_term_1, + beta2 = openawsem.functionTerms.hydrogenBondTerms.beta_term_2, + beta3 = openawsem.functionTerms.hydrogenBondTerms.beta_term_3, + pap1 = partial(openawsem.functionTerms.hydrogenBondTerms.pap_term_1, + ssweightFileName=f"{frags_dir}/ssweight"), + pap2 = partial(openawsem.functionTerms.hydrogenBondTerms.pap_term_2, + ssweightFileName=f"{frags_dir}/ssweight"), + DH = partial(openawsem.functionTerms.debyeHuckelTerms.debye_huckel_term, + chargeFile=f"{frags_dir}/charge.txt") + ) + + protein.setup_virtual_sites(s) + + #Add DNA-protein interaction forces + for force_name in open3SPN2.protein_dna_forces: + print(force_name) + force = open3SPN2.protein_dna_forces[force_name](dna,protein) + s.addForce(force) + forces.update({force_name: force}) + + #OpenAWSEM forces with exclusions + for force_name in openAWSEMforces: + print(force_name) + if force_name in ['contact']: + force = openAWSEMforces[force_name](protein, withExclusion=False,periodic=False) + print(force_name, "pre-add #Exclusions", force.getNumExclusions()) + open3SPN2.addNonBondedExclusions(dna,force) + print(force_name, "post-add #Exclusions", force.getNumExclusions()) + elif force_name in ['Excl']: + force = openAWSEMforces[force_name](protein) + print(force_name, "pre-add #Exclusions", force.getNumExclusions()) + open3SPN2.addNonBondedExclusions(dna,force) + print(force_name, "post-add #Exclusions", force.getNumExclusions()) + #continue + else: + force = openAWSEMforces[force_name](protein) + s.addForce(force) + forces.update({force_name: force}) + + + if computeQ: + analysis = dict(Rg = openawsem.functionTerms.biasTerms.rg_term, + Q = partial(openawsem.functionTerms.biasTerms.q_value, reference_pdb_file = reference, forceGroup=1)) + for force_name in analysis: + print(force_name) + force = analysis[force_name](protein) + s.addForce(force) + forces.update({force_name: force}) + return forces diff --git a/open3SPN2/scripts/protein_DNA_analysis.py b/open3SPN2/scripts/protein_DNA_analysis.py new file mode 100644 index 0000000..90731db --- /dev/null +++ b/open3SPN2/scripts/protein_DNA_analysis.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +import os +import argparse +import mdtraj as md +import pandas as pd +import openmm +import open3SPN2 +import openawsem +from functools import partial +import importlib.util + +import openmm.app +import openmm.unit +import sys +import numpy as np + +import argparse + +def run(args): + trajectoryPath = os.path.abspath(args.trajectory) + if args.output is None: + outFile = os.path.join(os.path.dirname(trajectoryPath), "info.dat") + else: + outFile = os.path.join(os.path.dirname(trajectoryPath), args.output) + + simulation_platform = args.platform + platform = openmm.Platform.getPlatformByName(simulation_platform) + + #aries specific block + if simulation_platform == "OpenCL": + platform.setPropertyDefaultValue('OpenCLPlatformIndex', '0') + platform.setPropertyDefaultValue('DeviceIndex', args.device) + + # fix=open3SPN2.fixPDB(args.protein) + fix=open3SPN2.fixPDB(args.proteinDNA) + + #Create a table containing both the proteins and the DNA + complex_table=open3SPN2.pdb2table(fix) + + #Generate a coarse-grained model of the Protein molecules + protein_atoms=openawsem.Protein.CoarseGrain(complex_table) + + #Create the merged system + pdb=openmm.app.PDBFile(args.proteinDNA) + top=pdb.topology + coord=pdb.positions + forcefield=openmm.app.ForceField(openawsem.xml,open3SPN2.xml) + s=forcefield.createSystem(top) + + #Create the DNA and Protein Objects + dna=open3SPN2.DNA.fromCoarsePDB(args.proteinDNA) + with open('protein.seq') as ps: + protein_seq=ps.readlines()[0] + protein=openawsem.Protein.fromCoarsePDB(args.proteinDNA, + sequence=protein_seq) + dna.periodic=False + protein.periodic=False + + #Initialize the force dictionary + forceSetupFile = args.forces + #forces={} + + print(f"using force setup file from {forceSetupFile}") + spec = importlib.util.spec_from_file_location("forces", forceSetupFile) + # print(spec) + forces_file = importlib.util.module_from_spec(spec) + spec.loader.exec_module(forces_file) + forces = forces_file.set_up_forces(s,protein, dna, computeQ = True) + + #Initialize the simulation + temperature=300 * openmm.unit.kelvin + platform_name=args.platform #'Reference','CPU','CUDA', 'OpenCL' + integrator = openmm.LangevinIntegrator(temperature, + 1 / openmm.unit.picosecond, + 2 * openmm.unit.femtoseconds) + platform = openmm.Platform.getPlatformByName(platform_name) + simulation = openmm.app.Simulation(top,s, integrator, platform) + simulation.context.setPositions(coord) + energy_unit=openmm.unit.kilojoule_per_mole + + trajectory = md.load(args.trajectory, top=args.proteinDNA) + + forceGroupTable = { + "Q": 1, + "Rg": 2, + # "Rg_bias": 5, + "Bond": 6, + "Angle": 7, + "Stacking": 8, + "Dihedral": 9, + "BasePair": 10, + "CrossStacking": 11, + "ExclusionDNA": 12, + "ElectrostaticsDNA": 13, + "ExclusionProteinDNA": 14, + "ElectrostaticsProteinDNA": 15, + # "Reserved_for_direct_protein_DNA_readout_interactions": 16, + # "Burial": 17, + # "Helix_orientation": 18, + # "Pulling": 19, + "Backbone": 20, + "Rama": 21, + "Contact": 22, + "Fragment": 23, + # "Membrane": 24, + # "ER": 25, + # "TBM_Q": 26, + "Beta": 27, + "Pap": 28, + "Helical": 29, + "Debye_Huckel": 30, + "AMH-Go": 31, + "Total_Energy": list(range(5, 32)) + } + print("Verify forceGroupTable in protein_DNA_analysis is set up correctly.") + print("Total Energy includes forceGroups from 5 to 31, both sides inclusive.") + showValue = ["Q", "Rg"] + showEnergy = [ + # "Rg_bias", + "Bond", + "Angle", + "Stacking", + "Dihedral", + "BasePair", + "CrossStacking", + "ExclusionDNA", + "ElectrostaticsDNA", + "ExclusionProteinDNA", + "ElectrostaticsProteinDNA", + # "Reserved_for_direct_protein_DNA_readout_interactions", + # "Burial", + # "Helix_orientation", + # "Pulling", + "Backbone", + "Rama", + "Contact", + "Fragment", + # "Membrane", + # "ER", + # "TBM_Q", + "Beta", + "Pap", + "Helical", + "Debye_Huckel", + "AMH-Go", + "Total_Energy" + ] + showAll = showValue + showEnergy + + print("Printing energies") + + with open(outFile, "w") as out: + line = " ".join(["{0:<8s}".format(i) for i in ["Steps"] + showAll]) + print(line) + out.write(line+"\n") + # for step, pdb in enumerate(pdb_trajectory): + # simulation.context.setPositions(pdb.positions) + for step in range(len(trajectory)): + simulation.context.setPositions(trajectory.openmm_positions(step)) + e = [] + for term in showAll: + if type(forceGroupTable[term]) == list: + g = set(forceGroupTable[term]) + elif forceGroupTable[term] == -1: + g = -1 + else: + g = {forceGroupTable[term]} + state = simulation.context.getState(getEnergy=True, groups=g) + # if term == "Q" or term == "Rg" or term == "Qc" or term == "Q_wat" or term == "Q_mem": + if term in showValue: + termEnergy = state.getPotentialEnergy().value_in_unit(openmm.unit.kilojoule_per_mole) + else: + termEnergy = state.getPotentialEnergy().value_in_unit(openmm.unit.kilocalories_per_mole) + e.append(termEnergy) + # print(*e) + line = " ".join([f"{step:<8}"] + ["{0:<8.2f}".format(i) for i in e]) + print(line) + out.write(line+"\n") + # print(forceGroupTable[term], state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("proteinDNA", help="The name of the protein", default="./clean.pdb") + parser.add_argument("-t", "--trajectory", type=str, default="./output.dcd") + parser.add_argument("-o", "--output", type=str, default=None, help="The Name of file that show your energy.") + parser.add_argument("-p", "--platform", type=str, default="OpenCL", help="Could be OpenCL, CUDA and CPU") + parser.add_argument('--device',default='0') + parser.add_argument("-f", "--forces", default="forces_setup.py", type=str, help="forces setup file") + #parser.add_argument("-l", "--fragment", type=str, default="./frags.mem", help="Fragment memory") #temporary placeholder + #parser.add_argument("-a", "--AWSEM", type=str, default="./", help="protein-only AWSEM folder, should have fragment library") #not temporary + args = parser.parse_args() + + with open('analysis_commandline_args.txt', 'a') as f: + f.write(' '.join(sys.argv)) + f.write('\n') + print(' '.join(sys.argv)) + + run(args) + +if __name__=="__main__": + main() \ No newline at end of file diff --git a/open3SPN2/scripts/protein_DNA_energy.py b/open3SPN2/scripts/protein_DNA_energy.py new file mode 100644 index 0000000..df402e9 --- /dev/null +++ b/open3SPN2/scripts/protein_DNA_energy.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python +# coding: utf-8 + +# If you want to specify the package address +# you can add them to the PYTHONPATH environment variable. +# Also you can add them on the run time uncommenting the lines below +import sys +import os +# open3SPN2_HOME = '/Users/weilu/open3spn2/' +#openAWSEM_HOME = '/home/sl206/Programs/openawsem' +# sys.path.insert(0,open3SPN2_HOME) +#sys.path.insert(0,openAWSEM_HOME) + +import importlib.util + +#sys.path.append('/home/sl206/miniconda3/envs/openmm/lib/python3.6') +#sys.path.append('/home/sl206/miniconda3/envs/openmm/lib/python3.6/site-packages') +#sys.path.append('/home/sl206/miniconda3/pkgs') + +import argparse + +#Import openAWSEM, open3SPN2 and other libraries +import pandas as pd +import numpy as np +import openmm +#import openmm + +from functools import partial +import sys + +import open3SPN2 +import openawsem + +import openmm.app +import openmm.unit + + +def printEnergy(simulation, forces): + # #Total energy + energy_unit=openmm.unit.kilocalorie_per_mole + state = simulation.context.getState(getEnergy=True) + energy = state.getPotentialEnergy().value_in_unit(energy_unit) + print('Caution! The energy terms with identical energy values are in the same forceGroup!') + print('TotalEnergy',round(energy,6),energy_unit.get_symbol()) + + # #Detailed energy + energies = {} + for force_name, force in forces.items(): + group=force.getForceGroup() + state = simulation.context.getState(getEnergy=True, groups=2**group) + energies[force_name] =state.getPotentialEnergy().value_in_unit(energy_unit) + + for force_name in forces.keys(): + print(force_name, round(energies[force_name],6),energy_unit.get_symbol()) + +def write(message, output): + with open(output, 'a') as f: + f.write(message) + f.write('\n') + + +def writeEnergy(simulation, forces, output): + # #Total energy + energy_unit=openmm.unit.kilocalorie_per_mole + state = simulation.context.getState(getEnergy=True) + energy = state.getPotentialEnergy().value_in_unit(energy_unit) + print('TotalEnergy',round(energy,6),energy_unit.get_symbol()) + with open(output, 'a') as f: + f.write('Caution! The energy terms with identical energy values are in the same forceGroup! \n') + f.write(f'TotalEnergy {round(energy,6)} {energy_unit.get_symbol()}') + f.write('\n') + + # #Detailed energy + energies = {} + + for force_name, force in forces.items(): + group=force.getForceGroup() + state = simulation.context.getState(getEnergy=True, groups=2**group) + energies[force_name] =state.getPotentialEnergy().value_in_unit(energy_unit) + + for force_name in forces.keys(): + with open(output, 'a') as f: + f.write(f'{force_name} {round(energies[force_name],6)} {energy_unit.get_symbol()}') + f.write('\n') + +def savePDB(toPath, simulation, PDBfile_name): + state = simulation.context.getState(getPositions=True) + positions = state.getPositions() + with open(os.path.join(toPath, PDBfile_name), "w") as pdb_file: + openmm.app.PDBFile.writeFile(simulation.topology, positions, file=pdb_file) + +def run(args): + proteinDNA = args.proteinDNA + + pwd = os.getcwd() + toPath = os.path.abspath(args.to) + forceSetupFile = args.forces + + if args.to != "./": + # os.system(f"mkdir -p {args.to}") + os.makedirs(toPath, exist_ok=True) + os.system(f"cp {forceSetupFile} {toPath}/{forceSetupFile}") + + #Create the merged system + pdb=openmm.app.PDBFile(f'{proteinDNA}.pdb') + top=pdb.topology + coord=pdb.positions + forcefield=openmm.app.ForceField(openawsem.xml,open3SPN2.xml) + s=forcefield.createSystem(top) + + #Create the DNA and Protein Objects + dna=open3SPN2.DNA.fromCoarsePDB(f'{proteinDNA}.pdb') + #dna.computeTopology(template_from_X3DNA=True) + with open('protein.seq') as ps: + protein_sequence_one=ps.readlines()[0] + protein=openawsem.Protein.fromCoarsePDB(f'{proteinDNA}.pdb',sequence=protein_sequence_one) + dna.periodic=False + protein.periodic=False + #Don't activate this below. Appears not to apply if you have Protein. + #s=open3SPN2.System(dna, periodicBox=None) + + print(s.getForces()) + + #forces={} + + print(f"using force setup file from {forceSetupFile}") + spec = importlib.util.spec_from_file_location("forces", forceSetupFile) + # print(spec) + forces_file = importlib.util.module_from_spec(spec) + spec.loader.exec_module(forces_file) + forces = forces_file.set_up_forces(s,protein, dna, computeQ = False) + + # #Initialize Molecular Dynamics simulations + + temperature=args.tempStart * openmm.unit.kelvin + Tstart = args.tempStart + output = f"{toPath}/{args.output}" + platform_name=args.Platform #'Reference','CPU','CUDA', 'OpenCL' + + integrator = openmm.LangevinIntegrator(temperature, 1 / openmm.unit.picosecond, 2 * openmm.unit.femtoseconds) + platform = openmm.Platform.getPlatformByName(platform_name) + #platform.setPropertyDefaultValue('OpenCLPlatformIndex', '0') + #platform.setPropertyDefaultValue('DeviceIndex', args.device) + + simulation = openmm.app.Simulation(top,s, integrator, platform) + simulation.context.setPositions(coord) + printEnergy(simulation, forces) + write("Starting structure", output) + writeEnergy(simulation, forces, output) + + #reporter_frequency = 1000 + #append = False + #print("reporter_frequency", reporter_frequency) + #pdb_reporter=openmm.app.PDBReporter(os.path.join(toPath, "movie.pdb"), reporter_frequency) + #dcd_reporter=openmm.app.DCDReporter(os.path.join(toPath, "output.dcd"), reporter_frequency, append=False) + #energy_reporter=openmm.app.StateDataReporter(sys.stdout, reporter_frequency, step=True,time=True, potentialEnergy=True, temperature=True) + #output_reporter=openmm.app.StateDataReporter(os.path.join(toPath, "output.log"), reporter_frequency, step=True,time=True, potentialEnergy=True, temperature=True) + #checkpoint_reporter=openmm.app.CheckpointReporter(os.path.join(toPath, "checkpoint.chk"), reporter_frequency) + #simulation.reporters.append(pdb_reporter) + #simulation.reporters.append(dcd_reporter) + #simulation.reporters.append(energy_reporter) + #simulation.reporters.append(output_reporter) + #simulation.reporters.append(checkpoint_reporter) + + # initial minimization block + print("minization start") + integrator = openmm.CustomIntegrator(0.001) + simulation = openmm.app.Simulation(top,s, integrator, platform) + simulation.context.setPositions(coord) + print("Initial energies") + printEnergy(simulation, forces) + write("", output) + write("Initial energies", output) + writeEnergy(simulation, forces, output) + savePDB(toPath, simulation, PDBfile_name = "init.pdb") + simulation.minimizeEnergy() + print("Initial min energies") + printEnergy(simulation, forces) + write("", output) + write("Initial min energies", output) + writeEnergy(simulation, forces, output) + savePDB(toPath, simulation, PDBfile_name = "init_min.pdb") + dcd_reporter=openmm.app.DCDReporter(os.path.join(toPath, "output.dcd"), 1) + simulation.reporters.append(dcd_reporter) + simulation.step(1) + # MD minimization block + integrator = openmm.LangevinIntegrator(Tstart*openmm.unit.kelvin, 1/openmm.unit.picosecond, args.timeStep*openmm.unit.femtoseconds) + simulation = openmm.app.Simulation(top,s, integrator, platform) + #simulation.context.setPositions(coord) # set the initial positions of the atoms + print("Now T = 300 K energies") + init_min_pdb = openmm.app.PDBFile(os.path.join(toPath, "init_min.pdb")) + simulation.context.setPositions(init_min_pdb.positions) + simulation.context.setVelocitiesToTemperature(Tstart*openmm.unit.kelvin) + printEnergy(simulation, forces) + write("", output) + write("Now T = 300 K energies", output) + writeEnergy(simulation, forces, output) + simulation.minimizeEnergy() # first, minimize the energy to a local minimum to reduce any large forces that might be present + savePDB(toPath, simulation, PDBfile_name = "MD_min.pdb") + print("Now T = 300 K min energies") + printEnergy(simulation, forces) + write("", output) + write("Now T = 300 K min energies", output) + writeEnergy(simulation, forces, output) + simulation.step(1) + print("minization end") + write("minimization end", output) + +def main(): + # from run_parameter import * + parser = argparse.ArgumentParser( + description="This is a python3 script to\ + automatic copy the template file, \ + run simulations") + + parser.add_argument("proteinDNA", help="The name of the proteinDNA system") + parser.add_argument("--to", default="./", help="location of minimization output file") + parser.add_argument("--tempStart", type=float, default=300, help="Starting temperature") + #parser.add_argument("-l", "--fragment", type=str, default="./frags.mem", help="Fragment memory (single or std)") #temporary placeholder + #parser.add_argument("-a", "--AWSEM", type=str, default="./", help="protein-only AWSEM folder, should have fragment library") #not temporary + parser.add_argument("-f", "--forces", type=str, default="forces_setup.py", help="forces setup file") #not temporary + parser.add_argument("-o", "--output", type=str, default="energy_output.log", help="Output file.") + parser.add_argument("-p", "--Platform", type=str, default="OpenCL", help="platform to use") + parser.add_argument("--timeStep", type=int, default=2) + args = parser.parse_args() + + + with open('energy_args.txt', 'a') as f: + f.write(' '.join(sys.argv)) + f.write('\n') + print(' '.join(sys.argv)) + + run(args) + +if __name__=="__main__": + main() diff --git a/open3SPN2/scripts/protein_DNA_run.py b/open3SPN2/scripts/protein_DNA_run.py new file mode 100644 index 0000000..0742071 --- /dev/null +++ b/open3SPN2/scripts/protein_DNA_run.py @@ -0,0 +1,232 @@ +import sys +import os + +#Import openAWSEM, open3SPN2 and other libraries +import pandas as pd +import numpy as np +import openmm + +from functools import partial + +import open3SPN2 +import openawsem + +import openmm.app +import openmm.unit + +import time + +import importlib.util + +import argparse + +def printEnergy(simulation, forces): + # #Total energy + energy_unit=openmm.unit.kilocalorie_per_mole + state = simulation.context.getState(getEnergy=True) + energy = state.getPotentialEnergy().value_in_unit(energy_unit) + print('Caution! The energy terms with identical energy values are in the same forceGroup!') + print('TotalEnergy',round(energy,6),energy_unit.get_symbol()) + + # #Detailed energy + energies = {} + for force_name, force in forces.items(): + group=force.getForceGroup() + state = simulation.context.getState(getEnergy=True, groups=2**group) + energies[force_name] =state.getPotentialEnergy().value_in_unit(energy_unit) + + for force_name in forces.keys(): + print(force_name, round(energies[force_name],6),energy_unit.get_symbol()) + +def savePDB(toPath, simulation, PDBfile_name): + state = simulation.context.getState(getPositions=True) + positions = state.getPositions() + with open(os.path.join(toPath, PDBfile_name), "w") as pdb_file: + openmm.app.PDBFile.writeFile(simulation.topology, positions, file=pdb_file) + +def run(args): + simulation_platform = args.platform + platform = openmm.Platform.getPlatformByName(simulation_platform) + if simulation_platform == "CPU": + if args.thread != -1: + platform.setPropertyDefaultValue("Threads", str(args.thread)) + print(f"{simulation_platform}: {platform.getPropertyDefaultValue('Threads')} threads") + + #aries specific block + elif simulation_platform == "OpenCL": + platform.setPropertyDefaultValue('OpenCLPlatformIndex', '0') + platform.setPropertyDefaultValue('DeviceIndex', args.device) + + pwd = os.getcwd() + toPath = os.path.abspath(args.to) + forceSetupFile = args.forces + + if args.to != "./": + # os.system(f"mkdir -p {args.to}") + os.makedirs(toPath, exist_ok=True) + os.system(f"cp {forceSetupFile} {toPath}/{forceSetupFile}") + + checkPointPath = None if args.fromCheckPoint is None else os.path.abspath(args.fromCheckPoint) + + proteinDNA = args.proteinDNA + + pdb=openmm.app.PDBFile(f'{proteinDNA}.pdb') + top=pdb.topology + coord=pdb.positions + forcefield=openmm.app.ForceField(openawsem.xml,open3SPN2.xml) + s=forcefield.createSystem(top, removeCMMotion=not args.removeCMMotionRemover) + + #Create the DNA and Protein Objects + dna=open3SPN2.DNA.fromCoarsePDB(f'{proteinDNA}.pdb') + #dna.computeTopology(template_from_X3DNA=True) + with open('protein.seq') as ps: + protein_sequence_one=ps.readlines()[0] + protein=openawsem.Protein.fromCoarsePDB(f'{proteinDNA}.pdb',sequence=protein_sequence_one) + dna.periodic=False + protein.periodic=False + #Don't activate this below. Appears not to apply if you have Protein. + #s=open3SPN2.System(dna, periodicBox=None) + + #forces={} + + print(f"using force setup file from {forceSetupFile}") + spec = importlib.util.spec_from_file_location("forces", forceSetupFile) + # print(spec) + forces_file = importlib.util.module_from_spec(spec) + spec.loader.exec_module(forces_file) + forces = forces_file.set_up_forces(s,protein, dna, computeQ = False) + + # #Initialize Molecular Dynamics simulations + snapShotCount = args.Frames + stepsPerT = int(args.steps/snapShotCount) + Tstart = args.tempStart + Tend = args.tempEnd + if args.reportFrequency == -1: + if stepsPerT == 0: + reporter_frequency = 4000 + else: + reporter_frequency = stepsPerT + else: + reporter_frequency = args.reportFrequency + # reporter_frequency = 4000 + #temperature=300 * openmm.unit.kelvin + + if args.fromCheckPoint: + integrator = openmm.LangevinIntegrator(Tstart*openmm.unit.kelvin, 1/openmm.unit.picosecond, args.timeStep*openmm.unit.femtoseconds) + simulation = openmm.app.Simulation(top,s, integrator, platform) + simulation.loadCheckpoint(checkPointPath) + else: + # initial minimization block + print("minization start") + integrator = openmm.CustomIntegrator(0.001) + simulation = openmm.app.Simulation(top,s, integrator, platform) + simulation.context.setPositions(coord) + print("Initial energies") + printEnergy(simulation, forces) + savePDB(toPath, simulation, PDBfile_name = "init.pdb") + simulation.minimizeEnergy() + print("Initial min energies") + printEnergy(simulation, forces) + savePDB(toPath, simulation, PDBfile_name = "init_min.pdb") + dcd_reporter=openmm.app.DCDReporter(os.path.join(toPath, "output.dcd"), 1) + simulation.reporters.append(dcd_reporter) + simulation.step(1) + # MD minimization block + integrator = openmm.LangevinIntegrator(Tstart*openmm.unit.kelvin, 1/openmm.unit.picosecond, args.timeStep*openmm.unit.femtoseconds) + simulation = openmm.app.Simulation(top,s, integrator, platform) + #simulation.context.setPositions(coord) # set the initial positions of the atoms + print("Now T = 300 K energies") + init_min_pdb = openmm.app.PDBFile(os.path.join(toPath, "init_min.pdb")) + simulation.context.setPositions(init_min_pdb.positions) + simulation.context.setVelocitiesToTemperature(Tstart*openmm.unit.kelvin) + printEnergy(simulation, forces) + simulation.minimizeEnergy() # first, minimize the energy to a local minimum to reduce any large forces that might be present + savePDB(toPath, simulation, PDBfile_name = "MD_min.pdb") + print("Now T = 300 K min energies") + printEnergy(simulation, forces) + simulation.step(1) + print("minization end") + + #simulation = openmm.app.Simulation(top,s, integrator, platform) + + energy_unit=openmm.unit.kilocalorie_per_mole + #state = simulation.context.getState(getEnergy=True) + #energy = state.getPotentialEnergy().value_in_unit(energy_unit) + #print(energy) + # #Set initial positions + #simulation.context.setPositions(s.coord.getPositions()) + + print("reporter_frequency", reporter_frequency) + pdb_reporter=openmm.app.PDBReporter(os.path.join(toPath, "movie.pdb"), reporter_frequency) + dcd_reporter=openmm.app.DCDReporter(os.path.join(toPath, "output.dcd"), reporter_frequency, append=True) + energy_reporter=openmm.app.StateDataReporter(sys.stdout, reporter_frequency, step=True,time=True, potentialEnergy=True, temperature=True) + output_reporter=openmm.app.StateDataReporter(os.path.join(toPath, "output.log"), reporter_frequency, step=True,time=True, potentialEnergy=True, temperature=True, append=True) + checkpoint_reporter=openmm.app.CheckpointReporter(os.path.join(toPath, "checkpoint.chk"), reporter_frequency) + simulation.reporters.append(pdb_reporter) + simulation.reporters.append(dcd_reporter) + simulation.reporters.append(energy_reporter) + simulation.reporters.append(output_reporter) + simulation.reporters.append(checkpoint_reporter) + + print("Simulation Start") + if args.simulation_mode == 0: + simulation.step(int(args.steps)) + elif args.simulation_mode == 1: + deltaT = (Tend - Tstart) / snapShotCount + for i in range(snapShotCount): + integrator.setTemperature((Tstart + deltaT*i)*openmm.unit.kelvin) + simulation.step(stepsPerT) + print("Simulation finished") + + print("Analysis start") + #os.system(f"{sys.executable} protein_DNA_analysis.py {args.proteinDNA}.pdb -t {os.path.join(toPath, "output.dcd")} -a {args.AWSEM} -l {args.fragment} -o {os.path.join(toPath, "info.dat")}") + os.system(f"{sys.executable} protein_DNA_analysis.py {args.proteinDNA}.pdb -t {os.path.join(toPath, 'output.dcd')} -f {args.forces} -o {os.path.join(toPath, 'info.dat')}") + print("Analysis finished") + +def main(): + # from run_parameter import * + parser = argparse.ArgumentParser( + description="This is a python3 script to\ + automatic copy the template file, \ + run simulations") + + parser.add_argument("proteinDNA", help="The name of the proteinDNA system") + parser.add_argument("--to", default="./", help="location of movie file") + #parser.add_argument("-c", "--chain", type=str, default="-1") + parser.add_argument("-t", "--thread", type=int, default=-1, help="default is using all that is available") + parser.add_argument("-p", "--platform", type=str, default="OpenCL") + parser.add_argument("-s", "--steps", type=int, default=10000000, help="step size, default 10,000,000") + parser.add_argument("--tempStart", type=float, default=300, help="Starting temperature") + parser.add_argument("--tempEnd", type=float, default=300, help="Ending temperature") + parser.add_argument("--fromCheckPoint", type=str, default=None, help="The checkpoint file you want to start from") + parser.add_argument("-m", "--simulation_mode", type=int, default=0, + help="default 1,\ + 0: constant temperature,\ + 1: temperature annealing") + parser.add_argument("--subMode", type=int, default=-1) + #parser.add_argument("-f", "--forces", default="forces_setup.py") + #parser.add_argument("--parameters", default=None) + parser.add_argument("-r", "--reportFrequency", type=int, default=-1, help="default value step/400") + #parser.add_argument("--fromOpenMMPDB", action="store_true", default=False) + #parser.add_argument("--fasta", type=str, default="crystal_structure.fasta") + parser.add_argument("--timeStep", type=int, default=2) + #parser.add_argument("--includeLigands", action="store_true", default=False) + parser.add_argument('--Frames', type=int, default=400, help="Number of frames") + parser.add_argument('--device',default='0') + parser.add_argument("-f", "--forces", default="forces_setup.py", type=str, help="forces setup file") + parser.add_argument('--removeCMMotionRemover', action="store_true", default=False, help='Removes CMMotionRemover. Recommended for periodic boundary conditions and membrane simulations') + #parser.add_argument("-l", "--fragment", type=str, default="./frags.mem", help="Fragment memory (single or std)") #temporary placeholder + #parser.add_argument("-a", "--AWSEM", type=str, default="./", help="protein-only AWSEM folder, should have fragment library") #not temporary + args = parser.parse_args() + + + with open('commandline_args.txt', 'a') as f: + f.write(' '.join(sys.argv)) + f.write('\n') + print(' '.join(sys.argv)) + + run(args) + +if __name__=="__main__": + + main() diff --git a/tests/conftest.py b/tests/conftest.py index 98e64ae..810ebb9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,134 @@ +import numpy as np +import pytest +import openmm +import openmm.unit as unit + + def pytest_addoption(parser): parser.addoption("--skip-platform", action="store", default=None, help="Skip tests for a specific platform") def pytest_configure(config): - config.addinivalue_line("markers", "skip_platform: mark test to be skipped for a specific platform") \ No newline at end of file + config.addinivalue_line("markers", "skip_platform: mark test to be skipped for a specific platform") + + +# --------------------------------------------------------------------------- # +# Real-structure fixtures for the opt-in bias / collective-variable tests. +# +# Each fixture exposes a ForceHarness that evaluates one force at a time on a +# fixed real structure, reusing a single openmm.System (only a lightweight +# Context is rebuilt per call) so the suite does not create many systems. +# --------------------------------------------------------------------------- # + +_PLATFORM = openmm.Platform.getPlatformByName('Reference') +_DNA_SEQUENCE = 'ATACAAAGGTGCGAGGTTTCTATGCTCCCACG' +_CLEAN_PDB = 'examples/Protein_DNA/clean.pdb' +_PROTEIN_SEQ = 'examples/Protein_DNA/protein.seq' + + +class ForceHarness: + """Evaluate a single opt-in force on a fixed structure. + + ``dna.atoms`` row order matches the system particle order, so atom indices in + ``dna.atoms`` are valid particle indices for the force and for the centroid + helpers below. + """ + + def __init__(self, dna, raw_system, positions, protein=None): + self.dna = dna + self.protein = protein + self._raw = raw_system + self.positions = positions + self.posnm = np.array(positions.value_in_unit(unit.nanometer)) + self.masses = np.array([raw_system.getParticleMass(i).value_in_unit(unit.dalton) + for i in range(raw_system.getNumParticles())]) + + def _context(self, force_obj): + raw = self._raw + while raw.getNumForces() > 0: + raw.removeForce(raw.getNumForces() - 1) + raw.addForce(force_obj.force) + integrator = openmm.VerletIntegrator(1 * unit.femtosecond) + context = openmm.Context(raw, integrator, _PLATFORM) + context.setPositions(self.positions) + return context + + def energy(self, force_obj, group): + """Potential energy (kJ/mol) of ``force_obj`` read from its force group.""" + state = self._context(force_obj).getState(getEnergy=True, groups={int(group)}) + return state.getPotentialEnergy().value_in_unit(unit.kilojoule_per_mole) + + def max_force_error(self, force_obj, group, h=1e-5): + """Largest |analytic force - central finite difference| over active atoms.""" + context = self._context(force_obj) + analytic = context.getState(getForces=True, groups={int(group)}).getForces(asNumpy=True).value_in_unit( + unit.kilojoule_per_mole / unit.nanometer) + + def energy_at(coords): + context.setPositions(coords * unit.nanometer) + return context.getState(getEnergy=True, groups={int(group)}).getPotentialEnergy().value_in_unit( + unit.kilojoule_per_mole) + + worst = 0.0 + active = np.where(np.abs(analytic).sum(axis=1) > 1e-9)[0] + for i in active: + for dim in range(3): + up = self.posnm.copy(); up[i, dim] += h + dn = self.posnm.copy(); dn[i, dim] -= h + numeric = -(energy_at(up) - energy_at(dn)) / (2 * h) + worst = max(worst, abs(numeric - analytic[i, dim])) + return worst + + def mass_centroid(self, indices): + weights = self.masses[indices] + return (weights[:, None] * self.posnm[indices]).sum(axis=0) / weights.sum() + + def geometric_centroid(self, indices): + return self.posnm[indices].mean(axis=0) + + def atom_indices(self, resSeq=None, chainID=None, name=None, nonzero_mass=False): + atoms = self.dna.atoms + mask = np.ones(len(atoms), dtype=bool) + if resSeq is not None: + mask &= (atoms['resSeq'].values == resSeq) + if chainID is not None: + mask &= (atoms['chainID'].values == chainID) + if name is not None: + mask &= (atoms['name'].values == name) + indices = [int(i) for i in atoms.index[mask]] + if nonzero_mass: + indices = [i for i in indices if self.masses[i] > 0] + return indices + + +@pytest.fixture(scope='module') +def dna_harness(): + """DNA-only real structure built from a sequence (no AWSEM needed).""" + from open3SPN2 import DNA, System + dna = DNA.fromSequence(_DNA_SEQUENCE) + dna.periodic = False + system = System(dna) + return ForceHarness(dna, system._wrapped_system, system.coord.getPositions()) + + +@pytest.fixture(scope='module') +def protein_dna_harness(): + """Real merged protein-DNA structure from examples/Protein_DNA/clean.pdb. + + Skipped when openawsem is not installed (it is needed to build the protein + object and the merged force field). Chains 1,2 are DNA; 3,4 are protein. + """ + openawsem = pytest.importorskip('openawsem') + import openmm.app + import open3SPN2 + + dna = open3SPN2.DNA.fromCoarsePDB(_CLEAN_PDB) + dna.periodic = False + with open(_PROTEIN_SEQ) as seq_file: + sequence = seq_file.readlines()[0].strip() + protein = openawsem.Protein.fromCoarsePDB(_CLEAN_PDB, sequence=sequence) + protein.periodic = False + + pdb = openmm.app.PDBFile(_CLEAN_PDB) + forcefield = openmm.app.ForceField(openawsem.xml, open3SPN2.xml) + system = forcefield.createSystem(pdb.topology) + return ForceHarness(dna, system, pdb.getPositions(), protein=protein) diff --git a/tests/test_bias.py b/tests/test_bias.py new file mode 100644 index 0000000..befe462 --- /dev/null +++ b/tests/test_bias.py @@ -0,0 +1,129 @@ +"""Executable specification for the opt-in bias forces. + +Each test states one contract in its docstring and pins it with an analytical +oracle on a real structure. Target offsets use ``_OFFSET``, three distinct +nonzero components, so a wrong axis, component, or sign fails a single witness. +""" + +import numpy as np +import pytest +import openmm.unit as unit + +from open3SPN2.force.bias import (PositionRestraint, StringProteinDNA, + BasePairProteinHarmonicBias, ElectrostaticBias, + AMHgoProteinDNA) +from open3SPN2.force.protein_dna import ElectrostaticsProteinDNA, select_string_groups +from open3SPN2.force import force_groups as fg + +_kJ_nm2 = unit.kilojoule_per_mole / unit.nanometer ** 2 +_OFFSET = np.array([0.13, -0.21, 0.37]) + + +def _target(point): + return dict(x0=point[0] * unit.nanometer, y0=point[1] * unit.nanometer, z0=point[2] * unit.nanometer) + + +def test_position_restraint_is_half_k_distance_squared(dna_harness): + """Restraint energy is 0.5*k*|centroid - r0|^2 about the centroid of all atoms.""" + centroid = dna_harness.geometric_centroid(dna_harness.atom_indices()) + k = 1000.0 + force = PositionRestraint(dna_harness.dna, k=k * _kJ_nm2, **_target(centroid + _OFFSET)) + assert dna_harness.energy(force, fg.MEMBRANE) == pytest.approx(0.5 * k * _OFFSET.dot(_OFFSET)) + + +def test_position_restraint_acts_on_selected_residues_only(dna_harness): + """The restrained point is the centroid of the selected residues, not of all atoms.""" + subset_centroid = dna_harness.geometric_centroid(dna_harness.atom_indices(resSeq=1)) + assert not np.allclose(subset_centroid, + dna_harness.geometric_centroid(dna_harness.atom_indices())) + force = PositionRestraint(dna_harness.dna, k=1000 * _kJ_nm2, + appliedToResidues=[1], **_target(subset_centroid)) + assert dna_harness.energy(force, fg.MEMBRANE) == pytest.approx(0.0, abs=1e-6) + + +def test_position_restraint_force_is_negative_energy_gradient(dna_harness): + """The restraint force equals -dE/dx (finite-difference check).""" + centroid = dna_harness.geometric_centroid(dna_harness.atom_indices(resSeq=2)) + force = PositionRestraint(dna_harness.dna, k=500 * _kJ_nm2, + appliedToResidues=[2], **_target(centroid + _OFFSET)) + assert dna_harness.max_force_error(force, fg.MEMBRANE) < 1e-4 + + +def test_position_restraint_rejects_empty_selection(dna_harness): + """Selecting no atoms is rejected with a clear error rather than a silent empty force.""" + with pytest.raises(ValueError): + PositionRestraint(dna_harness.dna, appliedToResidues=[9999]) + + +def test_string_potential_is_half_k_distance_from_r0_squared(protein_dna_harness): + """String energy is 0.5*k*(d - r0)^2 in the centroid-centroid distance d.""" + harness = protein_dna_harness + protein_idx, dna_idx = select_string_groups(harness.dna, '34', '12') + distance = np.linalg.norm(harness.mass_centroid(protein_idx) - harness.mass_centroid(dna_idx)) + k, r0 = 41.84, distance - 0.7 + force = StringProteinDNA(harness.dna, None, r0=r0, chain_protein='34', chain_DNA='12', k_string_PD=k) + assert harness.energy(force, fg.PULLING) == pytest.approx(0.5 * k * (distance - r0) ** 2, rel=1e-4) + + +def test_base_pair_bias_is_half_k_projection_squared(protein_dna_harness): + """Base-pair bias energy is 0.5*k*i^2 in the protein's base-pair projection offset i.""" + harness = protein_dna_harness + sugars = harness.atom_indices(chainID='1', name='S', nonzero_mass=True) + left, right = [sugars[0]], [sugars[5]] + protein = [harness.atom_indices(chainID='3', name='CA', nonzero_mass=True)[0]] + bpl, bpr, point = harness.posnm[left[0]], harness.posnm[right[0]], harness.posnm[protein[0]] + sep, k = 4, 4.184 + offset = sep * ((point - bpl).dot(bpr - bpl) / (bpr - bpl).dot(bpr - bpl) - 0.5) + force = BasePairProteinHarmonicBias(harness.dna, None, left, right, protein, base_pair_sep=sep, k=k) + assert harness.energy(force, fg.PROTEIN_DNA_BIAS) == pytest.approx(0.5 * k * offset ** 2, rel=1e-4) + + +def test_electrostatic_bias_squares_the_scaled_energy_deviation(protein_dna_harness): + """Electrostatic bias is 0.5*k_ebias*((E_elec - center)/4.184)^2 over the protein-DNA electrostatic energy.""" + harness = protein_dna_harness + k_elec, ldby = 1.0, 1.2 * unit.nanometer + e_elec = harness.energy(ElectrostaticsProteinDNA(harness.dna, harness.protein, k=k_elec, ldby=ldby), + fg.ELECTROSTATICS_PROTEIN_DNA) + k_ebias, center = 2.0, e_elec - 5.0 + bias = ElectrostaticBias(harness.dna, harness.protein, k_ebias=k_ebias, center=center, + k_elec=k_elec, ldby=ldby) + assert harness.energy(bias, fg.PROTEIN_DNA_BIAS) == pytest.approx( + 0.5 * k_ebias * ((e_elec - center) / 4.184) ** 2, rel=1e-4) + + +def test_amhgo_adds_a_gaussian_well_at_each_native_contact(protein_dna_harness, tmp_path): + """AMHgo adds a well -k*gamma*exp(-(r - r_ijN)^2/(2*sigma^2)) per native contact within the cutoff.""" + harness = protein_dna_harness + atoms = harness.dna.atoms + protein_residues = ['IPR', 'IGL', 'NGP'] + + def bonded_protein_atom(chain, res): # CB if present else CA, exactly as AMHgo selects + cb = atoms[(atoms['chainID'] == chain) & (atoms['resSeq'] == res) + & (atoms['name'] == 'CB') & atoms['resname'].isin(protein_residues)] + chosen = cb if len(cb) else atoms[(atoms['chainID'] == chain) & (atoms['resSeq'] == res) + & (atoms['name'] == 'CA') & atoms['resname'].isin(protein_residues)] + return int(chosen.index[0]) + + def base_atom(chain, res): + return int(atoms[(atoms['chainID'] == chain) & (atoms['resSeq'] == res) + & atoms['name'].isin(['A', 'T', 'G', 'C'])].index[0]) + + # Two real protein(chain 3)-DNA(chain 1) interface contacts, both within cutoff. + contacts = [(45, 4), (48, 4)] + delta, sigma_sq = 0.2, 0.05 + depth = (1 * unit.kilocalorie_per_mole).value_in_unit(unit.kilojoule_per_mole) + + rows = [] + expected = 0.0 + for protein_res, dna_res in contacts: + actual = np.linalg.norm(harness.posnm[bonded_protein_atom('3', protein_res)] + - harness.posnm[base_atom('1', dna_res)]) + r_ijN = actual - delta # stretch the contact so the Gaussian is exercised + rows.append(f"{protein_res} {dna_res} {r_ijN * 10}") + expected += -depth * np.exp(-delta ** 2 / (2 * sigma_sq)) + contact_file = tmp_path / "contact_protein_DNA.dat" + contact_file.write_text("\n".join(rows) + "\n") + + force = AMHgoProteinDNA(harness.dna, harness.protein, chain_protein='3', chain_DNA='1', + contact_file=str(contact_file)) + assert harness.energy(force, fg.AMH_GO) == pytest.approx(expected, rel=1e-4) diff --git a/tests/test_collective_variables.py b/tests/test_collective_variables.py new file mode 100644 index 0000000..0d70107 --- /dev/null +++ b/tests/test_collective_variables.py @@ -0,0 +1,66 @@ +"""Executable specification for the opt-in collective-variable readouts. + +Each readout reports a geometric value as its "energy". Each test states one +contract in its docstring and pins it with an analytical oracle on a real +structure. ``_OFFSET`` has three distinct nonzero components so a wrong axis or +sign fails a single witness. +""" + +import numpy as np +import pytest +import openmm.unit as unit + +from open3SPN2.force.collective_variables import (DistanceFromPoint, StringLength, + BasePairProteinPosition) +from open3SPN2.force.protein_dna import select_string_groups +from open3SPN2.force import force_groups as fg + +_OFFSET = np.array([0.13, -0.21, 0.37]) + + +def test_distance_from_point_reports_centroid_distance(dna_harness): + """Reports the Euclidean distance from the selected-atom centroid to r0.""" + centroid = dna_harness.geometric_centroid(dna_harness.atom_indices()) + cv = DistanceFromPoint(dna_harness.dna, + x0=(centroid[0] + _OFFSET[0]) * unit.nanometer, + y0=(centroid[1] + _OFFSET[1]) * unit.nanometer, + z0=(centroid[2] + _OFFSET[2]) * unit.nanometer) + assert dna_harness.energy(cv, fg.MEASUREMENT) == pytest.approx(np.linalg.norm(_OFFSET)) + + +def test_string_length_reports_centroid_distance(protein_dna_harness): + """Reports the mass-weighted centroid-centroid distance of the protein and DNA groups.""" + harness = protein_dna_harness + protein_idx, dna_idx = select_string_groups(harness.dna, '34', '12') + expected = np.linalg.norm(harness.mass_centroid(protein_idx) - harness.mass_centroid(dna_idx)) + cv = StringLength(harness.dna, None, chain_protein='34', chain_DNA='12') + assert harness.energy(cv, fg.MEASUREMENT) == pytest.approx(expected, rel=1e-4) + + +def test_string_length_pools_the_requested_chains(protein_dna_harness): + """Chain spec accepts one or many chains: 'AB' equals ['A','B'] and differs from a single chain.""" + harness = protein_dna_harness + pooled = harness.energy(StringLength(harness.dna, None, chain_protein='34', chain_DNA='12'), fg.MEASUREMENT) + listed = harness.energy(StringLength(harness.dna, None, chain_protein=['3', '4'], chain_DNA=['1', '2']), fg.MEASUREMENT) + single = harness.energy(StringLength(harness.dna, None, chain_protein='3', chain_DNA='1'), fg.MEASUREMENT) + assert pooled == pytest.approx(listed, rel=1e-9) + assert pooled != pytest.approx(single, rel=1e-9) + + +def test_base_pair_position_reports_projection_in_base_pairs(protein_dna_harness): + """Reports the protein point's projection onto the base-pair axis as a base-pair offset.""" + harness = protein_dna_harness + sugars = harness.atom_indices(chainID='1', name='S', nonzero_mass=True) + left, right = [sugars[0]], [sugars[5]] + protein = [harness.atom_indices(chainID='3', name='CA', nonzero_mass=True)[0]] + bpl, bpr, point = harness.posnm[left[0]], harness.posnm[right[0]], harness.posnm[protein[0]] + sep = 4 + expected = sep * ((point - bpl).dot(bpr - bpl) / (bpr - bpl).dot(bpr - bpl) - 0.5) + cv = BasePairProteinPosition(harness.dna, None, left, right, protein, base_pair_sep=sep) + assert harness.energy(cv, fg.MEASUREMENT) == pytest.approx(expected, rel=1e-4) + + +def test_readout_stays_out_of_the_energy_total(dna_harness): + """A readout's force group is excluded from the groups summed into the total energy.""" + cv = DistanceFromPoint(dna_harness.dna) + assert cv.force.getForceGroup() not in fg.TOTAL_ENERGY diff --git a/tests/test_optional_forces.py b/tests/test_optional_forces.py new file mode 100644 index 0000000..0b739b4 --- /dev/null +++ b/tests/test_optional_forces.py @@ -0,0 +1,60 @@ +"""Tests for the lazy optional-forces registry.""" + +import subprocess +import sys + +import pytest + +import open3SPN2 +from open3SPN2.optional_forces import optional_forces +import open3SPN2.force.bias as bias +import open3SPN2.force.collective_variables as cv + +_EXPECTED = { + "PositionRestraint", "StringProteinDNA", "ElectrostaticBias", + "BasePairProteinHarmonicBias", "AMHgoProteinDNA", "DistanceFromPoint", + "StringLength", "BasePairProteinPosition", +} + + +def test_available_lists_all_without_importing(): + assert set(optional_forces.available()) == _EXPECTED + assert optional_forces.available() == sorted(_EXPECTED) + assert len(optional_forces) == len(_EXPECTED) + assert set(optional_forces) == _EXPECTED + + +def test_getitem_resolves_to_the_real_class(): + assert optional_forces["PositionRestraint"] is bias.PositionRestraint + assert optional_forces["StringProteinDNA"] is bias.StringProteinDNA + assert optional_forces["DistanceFromPoint"] is cv.DistanceFromPoint + assert optional_forces["BasePairProteinPosition"] is cv.BasePairProteinPosition + + +def test_attribute_access(): + assert optional_forces.ElectrostaticBias is bias.ElectrostaticBias + assert optional_forces.StringLength is cv.StringLength + + +def test_exposed_on_package(): + assert open3SPN2.optional_forces is optional_forces + + +def test_unknown_name_raises(): + with pytest.raises(KeyError): + optional_forces["NotAForce"] + with pytest.raises(AttributeError): + optional_forces.NotAForce + + +def test_core_import_does_not_pull_in_optional_modules(): + # Importing open3SPN2 must not import the bias / CV submodules; their import + # cost (and any error in them) is deferred until a force is requested. + code = ( + "import sys, open3SPN2; " + "assert open3SPN2.optional_forces.available(); " + "assert 'open3SPN2.force.bias' not in sys.modules, 'bias imported eagerly'; " + "assert 'open3SPN2.force.collective_variables' not in sys.modules, 'cv imported eagerly'" + ) + result = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True) + assert result.returncode == 0, result.stderr