Tutorial on running Molecular Dynamics for GIST grid generation with scripts: Difference between revisions

From DISI
Jump to navigation Jump to search
Line 39: Line 39:
== Prepare for AMBER ==
== Prepare for AMBER ==


write the following script, named 001md.be_balsti_reduce.csh, using your favorate text editor (like vim). 
#!/bin/csh
# TEB and MF comments
# This script runs beblasti, creates dirs and splits pdb into rec and lig file for running amber MD.
# Setting up necessary environments
source /nfs/soft/python/envs/complete/latest/env.csh
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
# setenv AMBERHOME /nfs/soft/amber/amber14
set path = ( /nfs/home/tbalius/zzz.programs/msms $path )
# list of PDBs
# 4NVA apo with waters, 4NVE ligand
#set list = `cat /nfs/work/users/tbalius/VDR/Enrichment/pdblist_rat `
set list = '4NVE 4NVA'        ### CHANGE THIS
#set mountdir = "/mnt/nfs/work/tbalius/Water_Project/run_DOCK3.7"
set mountdir = `pwd`
# loop over all PDBs
foreach pdbname ( $list )
echo " ${pdbname} "
set workdir = ${mountdir}/MDrundir/prep/${pdbname}
# check if workdir exists
if ( -s $workdir ) then
    echo "$workdir exits"
    continue
endif
# if it exists do this:    rm -rf ${workdir}
  mkdir -p ${workdir}
  cd ${workdir}
 
  ln -s /nfs/home/tbalius/zzz.programs/msms/atmtypenumbers .
  #python ~/zzz.scripts/be_blasti.py --pdbcode $pdbname nocarbohydrate renumber | tee -a pdbinfo_using_biopython.log
  #>> to use e.g. buffer like MES without modifying the beblasti script, rename lig code in pdb file and use next line
  #>> to use pdbfile replace --pdbcode with --pdbfile (filename incl .pdb)
  #python /nfs/home/tbalius/zzz.scripts/be_blasti.py --pdbfile $pdbfile nocarbohydrate original_numbers | tee -a pdbinfo_using_biopython.log
  python /nfs/home/tbalius/zzz.scripts/be_blasti.py --pdbcode $pdbname nocarbohydrate original_numbers | tee -a pdbinfo_using_biopython.log
 
# extracts all waters into separate pdb file
  grep HOH  $workdir/${pdbname}_A.pdb > $workdir/water.pdb
  if !(-s rec.pdb) then
      echo "rec.pdb is not found"
  endif
# nomenclature clean-up.
  mv rec.pdb temp.pdb
  grep -v TER temp.pdb | grep -v END  > rec.pdb
  rm temp.pdb
  if (-s lig.pdb) then
      sed -e "s/HETATM/ATOM  /g" lig.pdb > xtal-lig.pdb
  else if (-s pep.pdb) then ## if no ligand and peptide
      sed -e "s/HETATM/ATOM  /g" pep.pdb > xtal-lig.pdb
  else
      echo "Warning: No ligand or peptid."
  endif
end # system
This script downloads PDB files from the web and breaks them up into ligand and receptor.
Next we align the receptors, ligands and waters into the same frame (002md.alignwithchimera_forTleap.csh):
#!/bin/csh
## this script was written by trent balius in the Rizzo Group, 2011
## modified in the Shoichet Group, 2013-2015
# TEB, MF comments 2017
# This shell script will do the following:
# (1) align the ligand file onto hydrated apo reference
# (2) then use superposed ligand to id nearby binding site waters in apo structure
# (3) save nearby waters only to pdb
set mountdir = `pwd`
set workdir = $mountdir/MDrundir/prep/002md_align
rm -rf $workdir
mkdir -p $workdir
cd $workdir
# get the right files.      ### CHANGE THIS
set pdb1 = "4NVA"
set pdb2 = "4NVE"
set apo_ref = "../$pdb1/rec.pdb"
set rec_lig = "../$pdb2/rec.pdb"
set lig = "../$pdb2/xtal-lig.pdb"
set wat = "../$pdb1/water.pdb"
set chimerapath = "/nfs/soft/chimera/current/bin/chimera"
#write instruction file for chimera based alignment
cat << EOF > chimera.com
# template #0
open $apo_ref
# rec #1
open $rec_lig
# xtal-lig
open $lig
# waters
open $wat
# move original to gist. it is harder to move the gist grids.
mmaker #0 #1
matrixcopy #1 #2
sel #3:HOH & #2 z<8
del #3 & ~sel
write format pdb  0 apo_ref.pdb
write format pdb  1 rec_lig_aligned.pdb
write format pdb  2 lig_aligned.pdb
write format mol2  2 lig_aligned.mol2
write format pdb  3 nearby_waters_aligned.pdb
EOF
${chimerapath} --nogui chimera.com > & chimera.com.out
Next we prepare the ligand-less receptor for molecular dynamics simulation with the following script ( 003md.tleap_reduce_HEME.csh ):
#!/bin/csh
# This script uses first reduce and then tleap to prepare a receptor for amber.
# The outputs are: parameter topology file (prm7) and a coordinate file (rst7)
#
## it includes the option to include Xtal waters (currently commented out)
#
# Written by Trent balius
# TEB/ MF comments Feb2017
setenv AMBERHOME /nfs/soft/amber/amber14
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
# CUDA for GPU
#setenv LD_LIBRARY_PATH ""
#setenv LD_LIBRARY_PATH "/usr/local/cuda-6.0/lib64/:$LD_LIBRARY_PATH"
set mountdir = `pwd`
set filedir = $mountdir/MDrundir/prep/002md_align
set workdir = $mountdir/MDrundir/prep/003md_tleap
# check if workdir exists
if ( -s $workdir ) then
    echo "$workdir exits"
    continue
endif
       
# if it exists do this:    rm -rf ${workdir}
mkdir -p ${workdir}
cd ${workdir}
#rm -f leap.log  tleap.rec.out tleap.rec.in
## uncomment the next line to include Xtal waters into simulation
#cp $filedir/nearby_waters_aligned.pdb wat.pdb
cp $filedir/apo_ref.pdb rec.pdb
# get HEME parameters
curl docking.org/~tbalius/code/waterpaper2017/parms/frcmod_teb_mf_mod.hemall > frcmod_teb_mf_mod.hemall
curl docking.org/~tbalius/code/waterpaper2017/parms/heme_all_plus1.in > heme_all_plus1.in
### 1st: run leap just to renumber residues.
# produces tleap input file -- renumbers rec
### draw bond between HIS and HEME with 5 coordinated iron.
##bond REC.175.NE2 REC.293.FE
cat << EOF >! tleap.rec.1.in
set default PBradii mbondi2
# load the protein force field
source leaprc.ff14SB
loadamberparams frcmod_teb_mf_mod.hemall
loadamberprep heme_all_plus1.in
REC = loadpdb rec.pdb
saveamberparm REC rec.1.leap.prm7 rec.1.leap.rst7
quit
EOF
# runs tleap and converts parameter and coordinate restart file back into pdb file
$AMBERHOME/bin/tleap -s -f tleap.rec.1.in > ! tleap.rec.1.out
$AMBERHOME/bin/ambpdb -p rec.1.leap.prm7 < rec.1.leap.rst7 >! rec.1.leap.pdb
# nomenclature clean-up
$DOCKBASE/proteins/Reduce/reduce -HIS -FLIPs rec.1.leap.pdb >! rec.nowat.reduce.pdb
sed -i 's/HETATM/ATOM  /g' rec.nowat.reduce.pdb
# grep -v means "everything but"; last grep statement removes inconsistently named HEM hydrogens (for leap to be added back in)
grep "^ATOM  " rec.nowat.reduce.pdb | sed -e 's/  new//g' | sed 's/  flip//g' | sed 's/  std//g' | grep -v "OXT" | grep -v " 0......HEM" >! rec.nowat.reduce_clean.pdb
curl docking.org/~tbalius/code/waterpaper2017/scripts/replace_his_with_hie_hid_hip.py > replace_his_with_hie_hid_hip.py
curl docking.org/~tbalius/code/waterpaper2017/scripts/replace_cys_to_cyx.py > replace_cys_to_cyx.py
curl docking.org/~tbalius/code/waterpaper2017/scripts/add.ters.py > add.ters.py
# python scripts do these three things: 1) checks his to give protonation specific names; 2) checks for disulphide bonds; 3) checks for missing residues and adds TER flag
python replace_his_with_hie_hid_hip.py rec.nowat.reduce_clean.pdb rec.nowat.1his.pdb
python replace_cys_to_cyx.py rec.nowat.1his.pdb rec.nowat.2cys.pdb
python add.ters.py rec.nowat.2cys.pdb rec.nowat.3ter.pdb
# add TER to file rec.nowat.H_mod2.pdb before HEM
grep -v "HEM" rec.nowat.3ter.pdb >! rec.nowat.4ter.pdb
echo "TER" >> rec.nowat.4ter.pdb
grep "HEM" rec.nowat.3ter.pdb >> rec.nowat.4ter.pdb
# fix wrong his protonation from automated pipeline
grep -v "HE2 HIE  172" rec.nowat.4ter.pdb | sed -e "s/HIE  172/HID  172/g" > rec.nowat.final.pdb
# if no histidine fix is required uncomment this
#mv rec.nowat.3ter.pdb rec.nowat.final.pdb
# or rec.nowat.4ter.pdb rec.nowat.final.pdb if TER was added before the heme
### 2nd -- re-run tleap to create amber-ready input files (solvated parameter and coordinate protein and instruction file)
# produces tleap input file in 3 steps, first script based instruction file (loading FF and rec file), then pipes disulphide info into middle and final instructions (write just rec, solvated in TIP3P water box of 10A about rec boundaries, write rec in water-box parameters and coordinates) for complete tleap input file
cat << EOF >! tleap.rec2.in
set default PBradii mbondi2
# load the protein force field
source leaprc.ff14SB
loadamberparams frcmod_teb_mf_mod.hemall
loadamberprep heme_all_plus1.in
REC = loadpdb rec.nowat.final.pdb
EOF
if (-e rec.nowat.2cys.pdb.for.leap ) then
  cat rec.nowat.2cys.pdb.for.leap >> tleap.rec2.in
endif
cat << EOF >> tleap.rec2.in
# draw bond between HIS and HEME
# 5 coordinated iron.
bond REC.172.NE2 REC.290.FE
## include the following two lines to include Xtal water in MD run
##  XTALWAT = loadpdb wat.pdb
##  RECWAT  = combine {REC XTALWAT}
## also need to change REC to RECWAT in the next 3 lines
saveamberparm REC rec.leap.prm7 rec.leap.rst7
solvateBox REC TIP3PBOX 10.0
saveamberparm REC rec.watbox.leap.prm7 rec.watbox.leap.rst7
quit
EOF
$AMBERHOME/bin/tleap -s -f tleap.rec2.in > ! tleap.rec2.out
# for ease of visualization in pymol
$AMBERHOME/bin/ambpdb -p rec.leap.prm7 < rec.leap.rst7 > rec.leap.pdb
echo "Look at rec.leap.pdb in pymol. May have to delete last column in pdb file for pymol."
# may have to remove element column so pymol does not get confused
# inspect tleap.rec.out and the leap.log file
# visually inspect (VMD) rec.10wat.leap.prm7, rec.10wat.leap.rst7 and rec.watbox.leap.prm7, rec.watbox.leap.rst7


== Run GIST post processing ==
== Run GIST post processing ==

Revision as of 17:24, 17 April 2017

Tutorial written by Trent Balius (Jan. 9, 2017).

Here are more GIST related tutorials: DOCK_3.7_with_GIST_tutorials

Disclaimer

This is foremost for training Shoichet lab members. But we hope that the community finds this useful.

To use this tutorial you will need the following:

  • AMBER12 or higher. Here I used AMBER14.
  • AMBERTOOLS.
  • GPUs to run AMBER on. In our case we uses an SGE queuing system to manage these jobs.
  • It is helpful if you have DOCK3.7 the latest version.

Introduction

Grid inhomogeneous Solvation Theory (GIST) is a method for calculating thermodynamic properties of water on GIST lattice.

Here, we will use these grids for DOCKing (although, there are many other uses).

Set up environment

you can add the following to your environment file (.cshrc) for just issue them on the command line.

source python with bio_python package.

source /nfs/soft/python/envs/complete/latest/env.csh

set DOCKBASE

setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"

set AMBERHOME

 setenv AMBERHOME /nfs/soft/amber/amber14

Put msms in your path

set path = ( /nfs/home/tbalius/zzz.programs/msms $path )

Prepare for AMBER

write the following script, named 001md.be_balsti_reduce.csh, using your favorate text editor (like vim).

#!/bin/csh 

# TEB and MF comments
# This script runs beblasti, creates dirs and splits pdb into rec and lig file for running amber MD.

# Setting up necessary environments
source /nfs/soft/python/envs/complete/latest/env.csh
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
# setenv AMBERHOME /nfs/soft/amber/amber14
set path = ( /nfs/home/tbalius/zzz.programs/msms $path )

# list of PDBs
# 4NVA apo with waters, 4NVE ligand
#set list = `cat /nfs/work/users/tbalius/VDR/Enrichment/pdblist_rat `
set list = '4NVE 4NVA'         ### CHANGE THIS

#set mountdir = "/mnt/nfs/work/tbalius/Water_Project/run_DOCK3.7"
set mountdir = `pwd`

# loop over all PDBs 
foreach pdbname ( $list )

echo " ${pdbname} "

set workdir = ${mountdir}/MDrundir/prep/${pdbname}

# check if workdir exists
if ( -s $workdir ) then
   echo "$workdir exits"
   continue
endif

# if it exists do this:     rm -rf ${workdir}
  mkdir -p ${workdir}
  cd ${workdir}
  
  ln -s /nfs/home/tbalius/zzz.programs/msms/atmtypenumbers .
  #python ~/zzz.scripts/be_blasti.py --pdbcode $pdbname nocarbohydrate renumber | tee -a pdbinfo_using_biopython.log
  #>> to use e.g. buffer like MES without modifying the beblasti script, rename lig code in pdb file and use next line
  #>> to use pdbfile replace --pdbcode with --pdbfile (filename incl .pdb)
  #python /nfs/home/tbalius/zzz.scripts/be_blasti.py --pdbfile $pdbfile nocarbohydrate original_numbers | tee -a pdbinfo_using_biopython.log
  python /nfs/home/tbalius/zzz.scripts/be_blasti.py --pdbcode $pdbname nocarbohydrate original_numbers | tee -a pdbinfo_using_biopython.log
  
# extracts all waters into separate pdb file
  grep HOH  $workdir/${pdbname}_A.pdb > $workdir/water.pdb

  if !(-s rec.pdb) then
      echo "rec.pdb is not found"
  endif

# nomenclature clean-up.
  mv rec.pdb temp.pdb
  grep -v TER temp.pdb | grep -v END  > rec.pdb
  rm temp.pdb

  if (-s lig.pdb) then
     sed -e "s/HETATM/ATOM  /g" lig.pdb > xtal-lig.pdb
  else if (-s pep.pdb) then ## if no ligand and peptide
     sed -e "s/HETATM/ATOM  /g" pep.pdb > xtal-lig.pdb
  else
     echo "Warning: No ligand or peptid."
  endif

end # system

This script downloads PDB files from the web and breaks them up into ligand and receptor.

Next we align the receptors, ligands and waters into the same frame (002md.alignwithchimera_forTleap.csh):

#!/bin/csh 
## this script was written by trent balius in the Rizzo Group, 2011
## modified in the Shoichet Group, 2013-2015
# TEB, MF comments 2017

# This shell script will do the following:
# (1) align the ligand file onto hydrated apo reference
# (2) then use superposed ligand to id nearby binding site waters in apo structure
# (3) save nearby waters only to pdb

set mountdir = `pwd`

set workdir = $mountdir/MDrundir/prep/002md_align

rm -rf $workdir
mkdir -p $workdir
cd $workdir

# get the right files.      ### CHANGE THIS
set pdb1 = "4NVA"
set pdb2 = "4NVE"

set apo_ref = "../$pdb1/rec.pdb"
set rec_lig = "../$pdb2/rec.pdb"
set lig = "../$pdb2/xtal-lig.pdb" 
set wat = "../$pdb1/water.pdb" 

set chimerapath = "/nfs/soft/chimera/current/bin/chimera"

#write instruction file for chimera based alignment
cat << EOF > chimera.com
# template #0
open $apo_ref 
# rec #1
open $rec_lig
# xtal-lig
open $lig
# waters
open $wat

# move original to gist. it is harder to move the gist grids. 
mmaker #0 #1 
matrixcopy #1 #2
sel #3:HOH & #2 z<8
del #3 & ~sel
write format pdb  0 apo_ref.pdb
write format pdb  1 rec_lig_aligned.pdb
write format pdb  2 lig_aligned.pdb
write format mol2  2 lig_aligned.mol2
write format pdb  3 nearby_waters_aligned.pdb
EOF

${chimerapath} --nogui chimera.com > & chimera.com.out


Next we prepare the ligand-less receptor for molecular dynamics simulation with the following script ( 003md.tleap_reduce_HEME.csh ):

#!/bin/csh
# This script uses first reduce and then tleap to prepare a receptor for amber.
# The outputs are: parameter topology file (prm7) and a coordinate file (rst7) 
#
## it includes the option to include Xtal waters (currently commented out)
#
# Written by Trent balius
# TEB/ MF comments Feb2017

setenv AMBERHOME /nfs/soft/amber/amber14
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
# CUDA for GPU
#setenv LD_LIBRARY_PATH ""
#setenv LD_LIBRARY_PATH "/usr/local/cuda-6.0/lib64/:$LD_LIBRARY_PATH"

set mountdir = `pwd`
set filedir = $mountdir/MDrundir/prep/002md_align
set workdir = $mountdir/MDrundir/prep/003md_tleap

# check if workdir exists
if ( -s $workdir ) then
   echo "$workdir exits"
   continue
endif
       
# if it exists do this:     rm -rf ${workdir}
mkdir -p ${workdir}
cd ${workdir}

#rm -f leap.log  tleap.rec.out tleap.rec.in

## uncomment the next line to include Xtal waters into simulation
#cp $filedir/nearby_waters_aligned.pdb wat.pdb
cp $filedir/apo_ref.pdb rec.pdb

# get HEME parameters
curl docking.org/~tbalius/code/waterpaper2017/parms/frcmod_teb_mf_mod.hemall > frcmod_teb_mf_mod.hemall
curl docking.org/~tbalius/code/waterpaper2017/parms/heme_all_plus1.in > heme_all_plus1.in

### 1st: run leap just to renumber residues.

# produces tleap input file -- renumbers rec
### draw bond between HIS and HEME with 5 coordinated iron.
##bond REC.175.NE2 REC.293.FE
cat << EOF >! tleap.rec.1.in 
set default PBradii mbondi2
# load the protein force field
source leaprc.ff14SB
loadamberparams frcmod_teb_mf_mod.hemall
loadamberprep heme_all_plus1.in
REC = loadpdb rec.pdb
saveamberparm REC rec.1.leap.prm7 rec.1.leap.rst7
quit
EOF

# runs tleap and converts parameter and coordinate restart file back into pdb file
$AMBERHOME/bin/tleap -s -f tleap.rec.1.in > ! tleap.rec.1.out
$AMBERHOME/bin/ambpdb -p rec.1.leap.prm7 < rec.1.leap.rst7 >! rec.1.leap.pdb 

# nomenclature clean-up
$DOCKBASE/proteins/Reduce/reduce -HIS -FLIPs rec.1.leap.pdb >! rec.nowat.reduce.pdb
sed -i 's/HETATM/ATOM  /g' rec.nowat.reduce.pdb 
# grep -v means "everything but"; last grep statement removes inconsistently named HEM hydrogens (for leap to be added back in)
grep "^ATOM  " rec.nowat.reduce.pdb | sed -e 's/   new//g' | sed 's/   flip//g' | sed 's/   std//g' | grep -v "OXT" | grep -v " 0......HEM" >! rec.nowat.reduce_clean.pdb


curl docking.org/~tbalius/code/waterpaper2017/scripts/replace_his_with_hie_hid_hip.py > replace_his_with_hie_hid_hip.py
curl docking.org/~tbalius/code/waterpaper2017/scripts/replace_cys_to_cyx.py > replace_cys_to_cyx.py
curl docking.org/~tbalius/code/waterpaper2017/scripts/add.ters.py > add.ters.py

# python scripts do these three things: 1) checks his to give protonation specific names; 2) checks for disulphide bonds; 3) checks for missing residues and adds TER flag
python replace_his_with_hie_hid_hip.py rec.nowat.reduce_clean.pdb rec.nowat.1his.pdb
python replace_cys_to_cyx.py rec.nowat.1his.pdb rec.nowat.2cys.pdb
python add.ters.py rec.nowat.2cys.pdb rec.nowat.3ter.pdb

# add TER to file rec.nowat.H_mod2.pdb before HEM
grep -v "HEM" rec.nowat.3ter.pdb >! rec.nowat.4ter.pdb
echo "TER" >> rec.nowat.4ter.pdb
grep "HEM" rec.nowat.3ter.pdb >> rec.nowat.4ter.pdb

# fix wrong his protonation from automated pipeline 
grep -v "HE2 HIE   172" rec.nowat.4ter.pdb | sed -e "s/HIE   172/HID   172/g" > rec.nowat.final.pdb

# if no histidine fix is required uncomment this
#mv rec.nowat.3ter.pdb rec.nowat.final.pdb
# or rec.nowat.4ter.pdb rec.nowat.final.pdb if TER was added before the heme


### 2nd -- re-run tleap to create amber-ready input files (solvated parameter and coordinate protein and instruction file)

# produces tleap input file in 3 steps, first script based instruction file (loading FF and rec file), then pipes disulphide info into middle and final instructions (write just rec, solvated in TIP3P water box of 10A about rec boundaries, write rec in water-box parameters and coordinates) for complete tleap input file
cat << EOF >! tleap.rec2.in 
set default PBradii mbondi2
# load the protein force field
source leaprc.ff14SB
loadamberparams frcmod_teb_mf_mod.hemall
loadamberprep heme_all_plus1.in
REC = loadpdb rec.nowat.final.pdb
EOF

if (-e rec.nowat.2cys.pdb.for.leap ) then 
  cat rec.nowat.2cys.pdb.for.leap >> tleap.rec2.in
endif

cat << EOF >> tleap.rec2.in
# draw bond between HIS and HEME
# 5 coordinated iron.
bond REC.172.NE2 REC.290.FE
## include the following two lines to include Xtal water in MD run
##  XTALWAT = loadpdb wat.pdb
##  RECWAT  = combine {REC XTALWAT}
## also need to change REC to RECWAT in the next 3 lines
saveamberparm REC rec.leap.prm7 rec.leap.rst7
solvateBox REC TIP3PBOX 10.0
saveamberparm REC rec.watbox.leap.prm7 rec.watbox.leap.rst7
quit
EOF

$AMBERHOME/bin/tleap -s -f tleap.rec2.in > ! tleap.rec2.out

# for ease of visualization in pymol 
$AMBERHOME/bin/ambpdb -p rec.leap.prm7 < rec.leap.rst7 > rec.leap.pdb

echo "Look at rec.leap.pdb in pymol. May have to delete last column in pdb file for pymol."
# may have to remove element column so pymol does not get confused

# inspect tleap.rec.out and the leap.log file
# visually inspect (VMD) rec.10wat.leap.prm7, rec.10wat.leap.rst7 and rec.watbox.leap.prm7, rec.watbox.leap.rst7

Run GIST post processing