Tutorial on running DOCK3.7 with GIST

From DISI
Jump to: navigation, search

Tutorial by Trent Balius (2017/01/30).

here are more GIST related tutorials: DOCK_3.7_with_GIST_tutorials

This tutorial assumes that you have already completed the MD tutorial Tutorial on running Molecular Dynamics for GIST grid generation with scripts.

  1. use the receptor and ligand the align to the MD frame.
  2. prepare the receptor for docking using blastermaster. Flexible receptor docking is approximated by docking to 16 states.
  3. modify the INDOCK file.
  4. download (or generate) ligand and decoy DUD-E like databases.
  5. Dock ligand and decoy databases and perform enrichment analysis
  6. perform enrichment analyis


Get ligand databases

For the CcP ligands download the dock-ready databases from here:

wget http://docking.org/~tbalius/code/waterpaper2017/CcP-ga_ph4_justdb2.tar.gz
tar -xzvf CcP-ga_ph4_justdb2.tar.gz

You can download other databases from the autodude web page: DOCK_3.7_2015/04/15_abl1_Tutorial#set_up_directories_and_get_databases

Prepare system for docking

Write the following script (011.dock.blastermaster.csh) to run blastermaster.

#!/bin/csh 

# This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres
# Run on sgehead as jobs are submitted to the queue

# TEB/ MF -- March 2017

setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" 
source /nfs/soft/python/envs/complete/latest/env.csh


set mountdir = `pwd`

set workdir = ${mountdir}/docking/1prep
set pramdir = ${mountdir}/for011.dockprep_parm_files


  if ( -s $workdir ) then
     echo "$workdir does exits"
     exit
  endif

mkdir -p $workdir
cd $workdir

ln -s $mountdir/gist/010a.full_gist_combine gistfiles 

cp $mountdir/gist/007align_to_md/rec_aligned.pdb .
cp $mountdir/gist/007align_to_md/lig_aligned.pdb .

cat rec_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM  /g" | sed -e 's/HEM/HM2/g' >!  rec.pdb

cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM  /g"  >  xtal-lig.pdb



# the following lines create a qsub script which submits blastermaster to the queue
cat <<EOF > qsub.csh
#!/bin/csh 
#\$ -cwd
#\$ -j yes
#\$ -o stderr
#\$ -q all.q

setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" 
source /nfs/soft/python/envs/complete/latest/env.csh

cd $workdir
$DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v
EOF

qsub qsub.csh 


# this will produce two directories:
# 1) working - contains all input and output files that are generated; not needed afterwards but as a reference
# 2) dockfiles - contains everything that is needed to run dock (copied from working)
#    grids 
#      trim.electrostatics.phi 
#      vdw.vdw 
#      vdw.bmp 
#      ligand.desolv.heavy
#      ligand.desolv.hydrogen
#    spheres
#      matching_spheres.sph

Docking

Create and apply a patch to the INDOCK file (012.dock.gistpatch.csh).

# script that modifies INDOCK file to include GIST parameters 
# will update INDOCK file, so first we back it up to _nogist
# then apply patch and call it _gist

# TEB/ MF -- March 2017

cd docking/1prep

set nominparmflag = 'F'

if (nominparmflag == 'T') then
cat << EOF > minparm.patch
--- INDOCK     2017-03-03 16:24:11.228826066 -0800
+++ INDOCK_min 2017-03-06 11:25:58.425345398 -0800
@@ -51,6 +51,16 @@
 ang2_range                    10.0
 ang1_step                     2.5
 ang2_step                     2.5
+##################################################### 
+#                    MINIMIZATION
+minimize                      no
+sim_itmax                     500
+sim_trnstep                   0.2
+sim_rotstep                   5.0
+sim_need_to_restart           1.0
+sim_cnvrge                    0.1
+min_cut                       1.0e15
+iseed                         777
 #####################################################
 # INPUT FILES / THINGS THAT CHANGE
 receptor_sphere_file          ../dockfiles/matching_spheres.sph
EOF
cp INDOCK INDOCK_ori
patch < minparm.patch 
endif


cat << EOF > gist.patch
--- INDOCK     2017-03-06 11:27:24.376904919 -0800
+++ INDOCK_gist        2017-03-06 11:33:37.519015401 -0800
@@ -36,6 +36,7 @@
 ligand_desolvation            volume
 vdw_maximum                   1.0e10
 electrostatic_scale           1.0
+gist_scale                   -1.0
 vdw_scale                     1.0
 internal_scale                0.0
 per_atom_scores               no
@@ -75,6 +76,7 @@
 solvmap_file                  ../dockfiles/ligand.desolv.heavy
 hydrogen_solvmap_file         ../dockfiles/ligand.desolv.hydrogen
 delphi_file                   ../dockfiles/trim.electrostatics.phi
+gist_file                     ../gistfiles/gist-EswPlus2Eww_ref.dx
 chemgrid_file                 ../dockfiles/vdw.vdw
 bumpmap_file                  ../dockfiles/vdw.bmp
 ############## end of INDOCK
EOF

cp INDOCK INDOCK_nogist
patch < gist.patch 
mv INDOCK INDOCK_gist
#cp INDOCK_nogist INDOCK

echo "For minimization: \n cp docking/1prep/INDOCK_nogist docking/1prep/INDOCK_min \n vi docking/1prep/INDOCK_min \n change line from no to yes"

Next dock the enrichment databases (013.dock.submit.lig-dec.docking.csh).

#!/bin/csh

#This script provides a alternative way to dock a DUD-e like ligand-decoy-database for the enrichment evaluation of actives over decoys
#It assumes that ligands and decoys have been pre-prepation (see script blablabla_ToDo) which needs to be run in SF.

# TEB/ MF -- March 2017

setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK 

#set indockType = '_gist'
#set indockType = '_nogist'
#set indockType = '_min'
set indockType = '_gist_elstat0p9'

set pwd = `pwd`
#set filedir = "/mnt/nfs/home/jklyu/work/DOCK_tutorial"  #CHANGE THIS
set filedir = ${pwd}/docking/1prep  #CHANGE THIS

# this is where the work is done:
set mountdir = ${pwd}/docking/2runEnrich   # Might CHANGE THIS
set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases"  # should contain decoy.smi and ligand.smi for ROC script 00005...csh
  ## TO DO - rename this outside in the dir structure and call in blbalbalbabla script
if (-s $dude_dir) then
 echo " $dude_dir exist"
else
 # this is something to modified in future. 
 # probably better to exit if it is not there.
 echo "databases do not exist. "
 echo "consider making a symbolic link to the database files"
endif

set list = "4NVA"  # CHANGE THIS (pdbname)
foreach pdbname ( $list )
# creates "ligands" and "decoys" and has the aim to dock all of the subsets for those two
foreach db_type ( "ligands" "decoys" )

set workdir1 = "${mountdir}/${pdbname}${indockType}/${db_type}"
echo $workdir1
#
mkdir -p  ${workdir1}
cd  ${workdir1}
#creat dirlist for *.db2.gz files prepared for docking
ls ${dude_dir}/${db_type}/*.db2.gz > ${db_type}_files.txt
#copy the files needed for dock
cp ${filedir}/INDOCK${indockType} ${workdir1}/INDOCK

ln -s ${filedir}/dockfiles/ ${workdir1}
ln -s ${filedir}/gistfiles/ ${workdir1}

#use dirlist to creat chunks for job submission
python $DOCKBASE/docking/setup/setup_db2_zinc15_file_number.py ./ chunk ./${db_type}_files.txt 500  count

csh $DOCKBASE/docking/submit/submit.csh

end # db_type
end # pdbname

combine the results and get the top scoring poses (014.dock.combineScoresAndPoses.csh.

#!/bin/csh

# This script combines the results from the ligand-decoy run 0003 (all chunks) into a combine file containing dock scores from OUTDOCK files
# Three files are produced (one for lig, decoy and both) 
# and: a file which has top poses as specified (e.g. top 1000 molecules with 2 poses each); two files (for lig and for decoys)

# TEB/ MF -- March 2017

# to remove dir
# rm -fr pdbs/3O1D/ligands-decoys/ligands/allChunksCombined/ pdbs/3O1D/ligands-decoys/decoys/allChunksCombined/ pdbs/3O1D/ligands-decoys/dockedLigDecoyCombined/

setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK
source /nfs/soft/python/envs/complete/latest/env.csh


set filedir = `pwd`
set mountdir = $filedir/docking/2runEnrich/
set d37 =  $DOCKBASE/analysis/

cd $mountdir

#set list = "4NVA_gist 4NVA_nogist"
#set list = "4NVA_gist"
#set list = "4NVA_min"
set list = "4NVA_gist_elstat0p9"
#set list = "4NVA_nogist"
#set list = `cat filename`
#set list = `cat $1`

foreach pdbname ( $list )
  
foreach db_type ( "ligands" "decoys" )

set workdir = ${mountdir}/${pdbname}/${db_type}/allChunksCombined

echo $pdbname


mkdir -p ${workdir}
cd ${workdir}



# creates a file called dirlist that contains the full path of all directories with docked runs (chunks)
ls -ld ${mountdir}/${pdbname}/${db_type}/* | awk '/chunk/{print $9}' > dirlist 

# for debuging
#echo "print $db_type dirlist:"
#cat dirlist

# script extracts scores from all docking runs specified in dirlist
$d37/extract_all.py 
# script gets poses for top scoring molecules and produces poses.mol2 (default name)
$d37/getposes.py -d ${mountdir}/${pdbname}/${db_type}

end # db_type

## combine decoyes and actives
set workdir =  ${mountdir}/${pdbname}/dockedLigDecoyCombined

rm -rf ${workdir}
mkdir -p ${workdir}
cd ${workdir}
 
cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist

# for debuging
#echo "print ALL dirlist"
#cat dirlist

$d37/extract_all.py 
#$d37/getposes.py -d ${mountdir}/${pdbname}    # doesn't work yet; not really needed
#getposes.py -z -l 1000 -x 2 -f extract_all.sort.uniq.txt -o ligands.1000.mol2 -d /mnt/nfs/work/users/fischer/VDR/27Jan2014_learningDOCKrgc/Enrichment/1DB1/DOCKING/ligands

end # pdbname

Enrichment

Here is the script for calculating enrichment (015.dock.AUCplot_of-lig-decoys.csh):

#!/bin/csh

# This script creates a log adjusted AUC (ROC) plot with ligand vs decoy results
# need X11 forwarding enabled when running remotely (ssh sgehead -X). 

# TEB/ MF -- March 2017

setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK
source /nfs/soft/python/envs/complete/latest/env.csh


set filedir = `pwd`
set mountdir = $filedir/docking/2runEnrich/
set d37 =  $DOCKBASE/analysis/

cd $mountdir



set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases"  # should contain decoy.smi and ligand.smi for ROC script 00005...csh

# 
#ln -s $dude_dir/ligands/ligands.smi .
#ln -s $dude_dir/decoys/decoys.smi .

# CHANGE THIS
#set list = "4NVA_gist 4NVA_nogist"
#set list = "4NVA_gist"
#set list = "4NVA_nogist"
#set list = "4NVA_min"
set list = "4NVA_gist_elstat0p9"
#set list = `cat filename`
#set list = `cat $1`

foreach pdbname ( $list )

set workdir = ${mountdir}/${pdbname}/ROC_ligdecoy/

# This script will not work without the following line:
echo "HERE is the HAWK" 

# checks that previous script 0003 has produced mol2 files
if (! ( -s $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2) && ! (-s $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2 )) then
   ls -l $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2 
   ls -l $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2
   echo "skipping ${pdbname}. cannot generate ROC"
   continue
endif


rm -rf $workdir
mkdir -p $workdir
cd $workdir

#wget http://dude.docking.org/targets/aa2ar/actives_final.ism

# reads ZINC ids (ligand or decoy molecule names)
# everything
awk '{print $2}' $dude_dir/decoys/decoys.smi > decoys.name
awk '{printf "%9s\n", $2}' $dude_dir/ligands/ligands.smi > ligands.name
# for other names e.g. CHEMBL try this instead
#awk '{printf "%9s\n", $3}' $dude_dir/ligands/ligands.smi > ligands.name

cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist

#which enrich.py
set enrich_py = $d37/enrich.py
set plots_py = $d37/plots.py

# 
# calculates AUCs, stores in txt file which is then plotted for all ligands and decoys
# - i is the flag for the input directory, this dir should contain the extract_all.sort.uniq.txt. 
#  the scripts enrich_py and plots_py will go through the extract file and look for the ligand and decoy names.
#  when it finds them it will populate the ROC cruve. these values are devied by the total number of ligand or decoys.
#  note that ofen not all ligands and not all decoy finish so the point (1,1) is always included and interpolations is performed . . . 
#
#python ${enrich_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name 
#python ${plots_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname 
python ${enrich_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name 
python ${plots_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname           # to plot logAUC
python ${plots_py} -i . -o auc_plot -n --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname         #-n flag to plot "normal" AUC

end   #pbdname

echo " logAUCs > Look at plots: \n eog $mountdir/$list/ROC_ligdecoy/*png \n "
echo "         > Or better for printing: \n gthumb $mountdir/$list/ROC_ligdecoy/*png  \n"
echo "For EF's > see script 015.c.calc_enrichment_factor_gen.py for calculation of EF's \n"

# For AUC's docking/2runEnrich/4NVA_gist/ROC_ligdecoy/roc_own.txt is used for plotting.
# Note that roc_own.txt is for AUC but roc_own.png in /ROC_ligdecoy/ dir contains logAUC plot.

prepare system for docking (flex)

First we need to align the pdb with multiple states in the MD frame (050.flex.alignwithchimera_gist_conf.csh).

#!/bin/csh 
## this script was written by trent balius in the Rizzo Group, 2011
## modified in the Shoichet Group, 2013-2015

# TEB, MF comments -- March 2017

# This shell script will do the following:
# (1) aligns the flexible receptor onto the 10th (production) MD frame -- as before for ligand.
# Hence we can make a simlink using that ligand.

set mountdir = `pwd`
set workdir  = $mountdir/flex/01align_to_md
rm -rf $workdir
mkdir -p $workdir
cd $workdir

#ln -s ${mountdir}/MDrundir/prep/002md_align . 

 
curl docking.org/~tbalius/code/waterpaper2017/parms/APO_rt_loop.pdb > APO_rt_loop.pdb
set ref = "$mountdir/gist/006ref/ref.pdb" # snapshot from simulation
set rec = "$workdir/APO_rt_loop.pdb" # rec with flexibility

set chimerapath = "/nfs/soft/chimera/current/bin/chimera"

#write instruction file for chimera based alignment
cat << EOF > chimera.com
# template #0
open $ref 
# rec #1
open $rec

# move original to gist. it is harder to move the gist grids. 
mmaker #0 #1 
write format pdb  0 ref.pdb
write format pdb  1 rec_aligned.pdb
EOF
 
${chimerapath} --nogui chimera.com > & chimera.com.out

# we have already aligned the ligand previously. 
ln -s $mountdir/gist/007align_to_md/lig_aligned.pdb .

Next we prepare this receptor for docking using blastermaster with the flexible receptor-flag (051.flex.blastermaster.csh).

#!/bin/csh 

# This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres
# Run on sgehead as jobs are submitted to the queue

# TEB/ MF - March 2017

setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" 
source /nfs/soft/python/envs/complete/latest/env.csh

#set Multiplier = 1.0
set Multiplier = 2.0
set reslist = 186+187+188+189+190+191+192+193+194,199,228

# if you need help selecting which residues to include as flexible the following line find all entries with a letter in the alternative conformation column
# grep "ATOM" rec.pdb | grep -v "^................ " | cut -c17-26 | uniq 

set mountdir = `pwd`

set workdir = ${mountdir}/flex/2prep
set pramdir = ${mountdir}/for011.dockprep_parm_files


  if ( -s $workdir ) then
     echo "$workdir does exits"
     exit
  endif

mkdir -p $workdir
cd $workdir

ln -s $mountdir/gist/010a.full_gist_combine gistfiles 

cp $mountdir/flex/01align_to_md/rec_aligned.pdb .
cp $mountdir/flex/01align_to_md/lig_aligned.pdb .

cat rec_aligned.pdb | grep -v "HOH" | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM  /g" | sed -e 's/HEM/HM2/g' >!  rec.pdb

cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM  /g"  >  xtal-lig.pdb



# the following lines create a qsub script which submits blastermaster to the queue
cat <<EOF > qsub.csh
#!/bin/csh 
#\$ -cwd
#\$ -j yes
#\$ -o stderr
#\$ -q all.q

setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" 
source /nfs/soft/python/envs/complete/latest/env.csh

cd $workdir
$DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v -f --flexiblePenaltyM=$Multiplier --flexibleResidues=${reslist} 
EOF

qsub qsub.csh 


# this will produce two directories:
# 1) working - contains all input and output files that are generated; not needed afterwards but as a reference
# 2) dockfiles - contains everything that is needed to run dock (copied from working)
#    grids 
#      trim.electrostatics.phi 
#      vdw.vdw 
#      vdw.bmp 
#      ligand.desolv.heavy
#      ligand.desolv.hydrogen
#    spheres
#      matching_spheres.sph

docking (flex)

enrichment (flex)