From a07c751e10ef29179c91f079edc6e189153ad564 Mon Sep 17 00:00:00 2001 From: cryobry <38270216+cryobry@users.noreply.github.com> Date: Sun, 7 Jul 2019 23:24:54 -0400 Subject: [PATCH] Add modules --- .gitignore | 1 - auto_namd/__init__.py | 0 auto_namd/files.py | 0 auto_namd/functions.py | 46 ++++++ auto_namd/job.py | 113 +++++++++++++ auto_namd/simulation.py | 356 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 515 insertions(+), 1 deletion(-) create mode 100644 auto_namd/__init__.py create mode 100644 auto_namd/files.py create mode 100644 auto_namd/functions.py create mode 100644 auto_namd/job.py create mode 100644 auto_namd/simulation.py diff --git a/.gitignore b/.gitignore index 1393696..dd1b613 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -auto_namd/ __pycache__ testing hosts.sh diff --git a/auto_namd/__init__.py b/auto_namd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/auto_namd/files.py b/auto_namd/files.py new file mode 100644 index 0000000..e69de29 diff --git a/auto_namd/functions.py b/auto_namd/functions.py new file mode 100644 index 0000000..0e209eb --- /dev/null +++ b/auto_namd/functions.py @@ -0,0 +1,46 @@ +import os +import re +import argparse + +# Natural sort +def natural_key(string_): + return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)] + + +# Return absolute path from relative or absolute path +def abs_path(path): + abs_path = os.path.abspath(path) + return abs_path + + +# Sanitize user input paths +def san_path(path): + san_path = os.path.normpath(path) + return san_path + + +# Argument parser for run_simulations.py +def sim_parser(defaults): + # Use default settings from run_simulation.py to generate argument parser + parser = argparse.ArgumentParser() + for _arg, _val in defaults.items(): + parser.add_argument(f'--{_arg}', type=type(_val[0]), default=_val[0], help=_val[1]) + args_dict = vars(parser.parse_args()) + return args_dict + + +# Argument parser for run_analysis.py +def analysis_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('--steps', type=int, + help='Number of timesteps to run the simulation') + parser.add_argument('--jobs_path', type=str, nargs='+', action='append', + help='Directory path containing the jobs to be run') + parser.add_argument('--namd_params', type=str, + help='Optional parameters to send to namd') + parser.add_argument('--ffs_path', type=str, + help='Location of the forcefield files') + parser.add_argument('--namd2_bin', type=str, + help='Location of the namd2 executable') + args = parser.parse_args() + return args diff --git a/auto_namd/job.py b/auto_namd/job.py new file mode 100644 index 0000000..9cddf0b --- /dev/null +++ b/auto_namd/job.py @@ -0,0 +1,113 @@ +import os +import sys +import glob + +from auto_namd.functions import * +from auto_namd.simulation import Simulation + + +class Job: + """ + A class that represents a job/system residing in a unique directory path + + Attributes: + self.path = absolute job directory path + self.name = name of the job, derived from parent directory + self.prefix = self.path + self.name + self.pdb = latest .pdb file + self.psf = latest .psf file + self.coor = latest .coor file + self.stage = current simulation stage + self.step = current simulation steps + """ + + def __init__(self, path): + self.path = path + self.name = self.get_job_name() + self.prefix = os.path.join(self.path, self.name) + self.pdb, self.psf = self.get_pdb_and_psf() + self.stage, self.step, self.coor = self.get_stage() + + + def get_job_name(self): + '''Get job name from path''' + name = os.path.basename(self.path) + return name + + + def get_pdb_and_psf(self): + pdb = glob.glob(f'{self.prefix}*_solv_ion.pdb') + psf = glob.glob(f'{self.prefix}*_solv_ion.psf') + if len(pdb) == 1 and len(psf) == 1: + return pdb[0], psf[0] + pdb = glob.glob(f'{self.prefix}.pdb') + psf = glob.glob(f'{self.prefix}.psf') + if len(pdb) == 1 and len(psf) == 1: + return pdb[0], psf[0] + else: + print('No PDB or PSF files found, exiting...') + sys.exit(1) + + + def get_stage(self): + coors_path = self.prefix + '*.coor' + coors = glob.glob(coors_path) + coors.sort(key=natural_key) + if len(coors) >= 1: + coor = coors[-1] + stage = coor.split('_')[-2].split('.')[0] + step = int(coor.split('_')[-1].split('.')[0]) + else: + coor = '' + stage = '' + step = 0 + return stage, int(step), coor + + + # allow the Simulation class to instantiate via the simulate() method + def simulate(self, ffs_path, steps, namdbin, params): + Simulation(self, ffs_path, steps, namdbin, params) + + + def info(self): + print(f'Job Path: {self.path}\n' + f'Name: {self.name}\n' + f'Working PDB: {self.pdb}\n' + f'Working PSF: {self.psf}\n' + f'Previous Step: {self.stage}\n' + f'Previous Step Number: {str(self.step)}\n' + ) + + +"""Job control""" +def get_next_job(jobs_path): + jobs_path = san_path(jobs_path) + job_dirs_l = get_jobs_from_path(jobs_path) + jobs = create_job_instances(job_dirs_l) + next_job = get_youngest_job(jobs) + return next_job + + +def get_jobs_from_path(jobs_path): + # Only return job paths with a pdb file + jobs_path = os.path.join(jobs_path, '**/*.pdb') + job_dirs = glob.glob(jobs_path, recursive=True) + job_dirs_l = [] + for job_dir in job_dirs: + job_dirs_l.append(os.path.dirname(job_dir)) + if len(job_dirs_l) < 1: + print('No valid jobs found in jobs path!') + sys.exit(1) + return job_dirs_l + + +def create_job_instances(job_paths): + job_instances = [] + for job_path in job_paths: + job_instances.append(Job(job_path)) + return job_instances + + +def get_youngest_job(jobs): + jobs.sort(key=lambda x: (x.stage, x.step)) + return jobs[0] diff --git a/auto_namd/simulation.py b/auto_namd/simulation.py new file mode 100644 index 0000000..9b0850b --- /dev/null +++ b/auto_namd/simulation.py @@ -0,0 +1,356 @@ +import glob +import subprocess as sp +import vmd + +from auto_namd.functions import * + +class Simulation: + """ + A class representing a simulation of a job + """ + + def __init__(self, job, ffs_path, steps, namdbin, params): + self.job = job + self.steps = int(steps) + self.namdbin = namdbin + self.params = params + # NAMD requires FF paths relative to the NAMD configuration file + # and not the CWD. So we make the paths to the ffs absolute + self.job.ffs_path = abs_path(san_path(ffs_path)) + self.job.ffs = self.get_ffs() + self.job.next_stage, self.job.next_step = self.get_next_stage() + self.job.conf, self.job.out = self.set_conf_out() + print(self.info()) + self.namd_conf() + self.namd() + + + def get_next_stage(self): + if self.job.stage == '4-sim': + next_stage = '4-sim' + next_step = self.job.step + self.steps + elif self.job.stage == '3-heat': + next_stage = '4-sim' + next_step = self.steps + elif self.job.stage == '2-min': + next_stage = '3-heat' + next_step = 0 + elif self.job.stage == '1-min': + next_stage = '2-min' + next_step = 0 + elif self.job.stage == '': + next_stage = '1-min' + next_step = 0 + return next_stage, int(next_step) + + + def get_ffs(self): + ffs = glob.glob(os.path.join(self.job.ffs_path, '*')) + return ffs + + + def set_conf_out(self): + """Set the NAMD configration and NAMD output filenames""" + if self.job.next_stage == '4-sim': + conf = f'{self.job.prefix}_{self.job.next_stage}' \ + f'_{str(self.next_step)}.conf' + out = f'{self.job.prefix}_{self.job.next_stage}' \ + f'_{str(self.job.next_step)}.out' + else: + conf = f'{self.job.prefix}_{self.job.next_stage}.conf' + out = f'{self.job.prefix}_{self.job.next_stage}.out' + return conf, out + + + def namd_conf(self): + if self.job.next_stage == '1-min': + self.min1() + if self.job.next_stage == '2-min': + self.min2() + if self.job.next_stage == '3-heat': + self.heat() + if self.job.next_stage == '4-sim': + self.sim() + + + def min1(self): + steps = 15000 + with open(self.job.conf, 'w') as conf: + conf.write('# Input\n') + conf.write(f'structure {self.job.psf}\n') + conf.write(f'coordinates {self.job.pdb}\n') + conf.write('paraTypeCharmm on\n') + for ff in job.ffs: + conf.write(f'parameters {ff}\n') + conf.write('\n') + conf.write('# Temperature\n') + conf.write('temperature 0\n') + conf.write('\n') + conf.write('# Force-Field Parameters\n') + conf.write('exclude scaled1-4\n') + conf.write('1-4scaling 1.0\n') + conf.write('cutoff 12.\n') + conf.write('switching on\n') + conf.write('switchdist 10.\n') + conf.write('pairlistdist 14\n') + conf.write('\n') + conf.write('# Integrator Parameters\n') + conf.write('timestep 1.0 ;# 1fs/step\n') + conf.write('nonbondedFreq 1\n') + conf.write('fullElectFrequency 2\n') + conf.write('stepspercycle 10\n') + conf.write('\n') + conf.write('# Output\n') + conf.write(f'outputName {self.job.name}_{self.job.next_stage}_{steps}\n') + conf.write('outputEnergies 100\n') + conf.write('outputPressure 100\n') + conf.write('\n') + conf.write('# Run\n') + conf.write(f'minimize {steps}') + + + def min2(self): + solv_ion(self.job) + cbvx, cbvy, cbvz, corx, cory, corz = self.calc_pcell(self.job) + steps = 15000 + with open(self.job.conf, 'w') as conf: + conf.write('# Input\n') + conf.write(f'structure {self.job.psf}\n') + conf.write(f'coordinates {self.job.pdb}\n') + conf.write('paraTypeCharmm on\n') + for ff in self.job.ffs: + conf.write(f'parameters {ff}\n') + conf.write('\n') + conf.write('# Temperature\n') + conf.write('temperature 0\n') + conf.write('\n') + conf.write('# Periodic Boundary Conditions\n') + conf.write('wrapWater on\n') + conf.write('wrapAll on\n') + conf.write(f'cellOrigin\t{corx}\t{cory}\t{corz}\n') + conf.write(f'cellBasisVector1\t{cbvx}\t0.0\t0.0\n') + conf.write(f'cellBasisVector2\t0.0\t{cbvy}\t0.0\n') + conf.write(f'cellBasisVector3\t0.0\t0.0\t{cbvz}\n') + conf.write('\n') + conf.write('# Force-Field Parameters\n') + conf.write('exclude scaled1-4\n') + conf.write('1-4scaling 1.0\n') + conf.write('cutoff 12.\n') + conf.write('switching on\n') + conf.write('switchdist 10.\n') + conf.write('pairlistdist 14\n') + conf.write('\n') + conf.write('# Integrator Parameters\n') + conf.write('timestep 1.0 ;# 1fs/step\n') + conf.write('nonbondedFreq 1\n') + conf.write('fullElectFrequency 2\n') + conf.write('stepspercycle 10\n') + conf.write('\n') + conf.write('# Output\n') + conf.write(f'outputName {self.job.name}_{self.job.next_stage}_{steps}\n') + conf.write('outputEnergies 100\n') + conf.write('outputPressure 100\n') + conf.write('\n') + conf.write('# Run\n') + conf.write(f'minimize {steps}') + + + def heat(self): + temp_reinit_steps = 100 + steps = 10000 + final_steps = 30 * temp_reinit_steps + steps + cbvx, cbvy, cbvz, corx, cory, corz = self.calc_pcell() + with open(self.job.conf, 'w') as conf: + conf.write('# Input\n') + conf.write(f'structure {self.job.psf}\n') + conf.write(f'coordinates {self.job.pdb}\n') + conf.write(f'bincoordinates {self.job.coor}\n') + conf.write('paraTypeCharmm on\n') + for ff in self.job.ffs: + conf.write(f'parameters {ff}\n') + conf.write('\n') + conf.write('# Temperature\n') + conf.write('temperature 0\n') + conf.write('\n') + conf.write('# Periodic Boundary Conditions\n') + conf.write('wrapWater on\n') + conf.write('wrapAll on\n') + conf.write(f'cellOrigin\t{corx}\t{cory}\t{corz}\n') + conf.write(f'cellBasisVector1\t{cbvx}\t0.0\t0.0\n') + conf.write(f'cellBasisVector2\t0.0\t{cbvy}\t0.0\n') + conf.write(f'cellBasisVector3\t0.0\t0.0\t{cbvz}\n') + conf.write('# Force-Field Parameters\n') + conf.write('exclude scaled1-4\n') + conf.write('1-4scaling 1.0\n') + conf.write('cutoff 12.\n') + conf.write('switching on\n') + conf.write('switchdist 10.\n') + conf.write('pairlistdist 14\n') + conf.write('\n') + conf.write('# Full Electrostatics\n') + conf.write('PME on\n') + conf.write('PMEGridSpacing 1.0\n') + conf.write('\n') + conf.write('# Integrator Parameters\n') + conf.write('timestep 1.0 ;# 1fs/step\n') + conf.write('nonbondedFreq 1\n') + conf.write('fullElectFrequency 2\n') + conf.write('stepspercycle 10\n') + conf.write('\n') + conf.write('# Output\n') + conf.write(f'outputName {self.job.name}_{self.job.next_stage}_{self.job.next_step}\n') + conf.write('outputEnergies 100\n') + conf.write('outputPressure 100\n') + conf.write('dcdfreq 1000\n') + conf.write('\n') + conf.write('# Constant Temperature Control\n') + conf.write('langevin on ;# do langevin dynamics\n') + conf.write('langevinDamping 0.5 ;# damping coefficient (gamma) of 0.5/ps\n') + conf.write('langevinTemp 310\n') + conf.write('langevinHydrogen yes ;# couple langevin bath to hydrogens\n') + conf.write('\n') + conf.write('# Constant Pressure Control\n') + conf.write('useGroupPressure no ;# needed for 2fs steps\n') + conf.write('useFlexibleCell yes ;# no for water box, yes for membrane\n') + conf.write('useConstantRatio yes ;# no for water box, yes for membrane\n') + conf.write('langevinPiston on\n') + conf.write('langevinPistonTarget 1.01325 ;# in bar -> 1 atm\n') + conf.write('langevinPistonPeriod 100.\n') + conf.write('langevinPistonDecay 50.\n') + conf.write('langevinPistonTemp 310\n') + conf.write('\n') + conf.write('# Run equilibration\n') + conf.write(f'set freq {temp_reinit_steps}\n') + conf.write('for {set i 10} {$i <= 310} {incr i 10} {\n') + conf.write('reinitvels $i\n') + conf.write('langevinTemp $i\n') + conf.write('run $freq\n') + conf.write('}\n') + conf.write('# Run stabilization\n') + conf.write(f'run {steps}') + + + def sim(self): + cbvx, cbvy, cbvz, corx, cory, corz = self.calc_pcell() + with open(self.job.conf, 'w') as conf: + conf.write('# Input\n') + conf.write(f'structure {self.job.psf}\n') + conf.write(f'coordinates {self.job.pdb}\n') + conf.write(f'bincoordinates {self.job.coor}\n') + conf.write('paraTypeCharmm on\n') + for ff in self.job.ffs: + conf.write(f'parameters {ff}\n') + conf.write('\n') + conf.write('# Temperature\n') + conf.write('temperature 0\n') + conf.write('\n') + conf.write('# Periodic Boundary Conditions\n') + conf.write('wrapWater on\n') + conf.write('wrapAll on\n') + conf.write(f'cellOrigin\t{corx}\t{cory}\t{corz}\n') + conf.write(f'cellBasisVector1\t{cbvx}\t0.0\t0.0\n') + conf.write(f'cellBasisVector2\t0.0\t{cbvy}\t0.0\n') + conf.write(f'cellBasisVector3\t0.0\t0.0\t{cbvz}\n') + conf.write('# Force-Field Parameters\n') + conf.write('exclude scaled1-4\n') + conf.write('1-4scaling 1.0\n') + conf.write('cutoff 12.\n') + conf.write('switching on\n') + conf.write('switchdist 10.\n') + conf.write('pairlistdist 14\n') + conf.write('\n') + conf.write('# Full Electrostatics\n') + conf.write('PME on\n') + conf.write('PMEGridSpacing 1.0\n') + conf.write('\n') + conf.write('# Integrator Parameters\n') + conf.write('timestep 2.0 ;# 2fs/step\n') + conf.write('rigidBonds all ;# needed for 2fs steps\n') + conf.write('nonbondedFreq 1\n') + conf.write('fullElectFrequency 2\n') + conf.write('stepspercycle 10\n') + conf.write('\n') + conf.write('# Output\n') + conf.write(f'outputName {self.job.name}_{self.job.next_stage}_{self.job.next_step}\n') + conf.write('outputEnergies 10000\n') + conf.write('outputPressure 10000\n') + conf.write('dcdfreq 10000\n') + conf.write('\n') + conf.write('# Constant Temperature Control\n') + conf.write('langevin on ;# do langevin dynamics\n') + conf.write('langevinDamping 0.5 ;# damping coefficient (gamma) of 0.5/ps\n') + conf.write('langevinTemp 310\n') + conf.write('langevinHydrogen no ;# couple langevin bath to hydrogens\n') + conf.write('\n') + conf.write('# Constant Pressure Control\n') + conf.write('useGroupPressure yes ;# needed for 2fs steps\n') + conf.write('useFlexibleCell yes ;# no for water box, yes for membrane\n') + conf.write('useConstantRatio yes ;# no for water box, yes for membrane\n') + conf.write('langevinPiston on\n') + conf.write('langevinPistonTarget 1.01325 ;# in bar -> 1 atm\n') + conf.write('langevinPistonPeriod 100.\n') + conf.write('langevinPistonDecay 50.\n') + conf.write('langevinPistonTemp 310\n') + conf.write('\n') + conf.write('# Run\n') + conf.write(f'run {self.job.steps}') + + + def solv_ion(self): + pdb_f = f'{self.job.prefix}_{self.job.stage}.pdb' + psf_f = f'{self.job.prefix}.psf' + solv_f = f'{self.job.prefix}_{self.job.stage}_solv' + solv_ion_f = f'{self.job.prefix}_{self.job.stage}_solv_ion' + molid = vmd.molecule.load('psf', psf_f, 'namdbin', self.job.coor) + vmd.molecule.write(molid, 'pdb', pdb_f) + vmd.evaltcl('package require solvate') + vmd.evaltcl(f'solvate {self.job.psf} {pdb_f} -o {solv_f} ' + f'-s WT -x 13 -y 13 -z 13 +x 13 +y 13 +z 13 -b 2.4') + vmd.evaltcl('package require autoionize') + vmd.evaltcl(f'autoionize -psf {solv_f}.psf -pdb {solv_f}.pdb -o {solv_ion_f} ' + f'-sc 0.15') + self.del_all_mols() + + + def calc_pcell(self): + if self.job.next_stage == '2-min' or self.job.next_stage == '3-min': + molid = vmd.molecule.load('psf', self.job.psf, 'pdb', self.job.pdb) + else: + molid = vmd.molecule.load('psf', self.job.psf, 'namdbin', self.job.coor) + all = vmd.atomsel("all", molid=molid) + minmax = all.minmax() + center = all.center() + cbvx = minmax[1][0] - minmax[0][0] + cbvy = minmax[1][1] - minmax[0][1] + cbvz = minmax[1][2] - minmax[0][2] + corx = center[0] + cory = center[1] + corz = center[2] + self.del_all_mols() + return cbvx, cbvy, cbvz, corx, cory, corz + + + def del_all_mols(self): + for mol in vmd.molecule.listall(): + vmd.molecule.delete(mol) + + + def namd(self): + cmd = f'{self.namdbin} {self.params} {self.job.conf} > {self.job.out}' + print(f'Running: {cmd}') + p1 = sp.run(cmd, shell=True) + + + def info(self): + print(f'Job Path: {self.job.path}\n' + f'Name: {self.job.name}\n' + f'Working PDB: {self.job.pdb}\n' + f'Working PSF: {self.job.psf}\n' + f'Working COOR: {self.job.coor}\n' + f'Previous Step: {self.job.stage}\n' + f'Previous Step Number: {str(self.job.step)}\n' + f'Next Stage: {self.job.next_stage}\n' + f'Next Step Number: {str(self.job.next_step)}\n' + f'FF Path: {self.job.ffs_path}\n' + f'FFs: {self.job.ffs}\n' + )