Tutorial on running DOCK3.7 with GIST: Difference between revisions
| Line 91: | Line 91: | ||
# matching_spheres.sph | # matching_spheres.sph | ||
== | == Docking == | ||
Create and apply a patch to the INDOCK file (012.dock.gistpatch.csh). | Create and apply a patch to the INDOCK file (012.dock.gistpatch.csh). | ||
Revision as of 16:52, 19 April 2017
Tutorial by Trent Balius (2017/01/30).
here are more GIST related tutorials: DOCK_3.7_with_GIST_tutorials
This tutorial assumes that you have already completed the MD tutorial Tutorial on running Molecular Dynamics for GIST grid generation with scripts.
- use the receptor and ligand the align to the MD frame.
- prepare the receptor for docking using blastermaster. Flexible receptor docking is approximated by docking to 16 states.
- modify the INDOCK file.
- download (or generate) ligand and decoy DUD-E like databases.
- Dock ligand and decoy databases and perform enrichment analysis
- perform enrichment analyis
Get ligand databases
For the CcP ligands download the dock-ready databases from here:
wget http://docking.org/~tbalius/code/waterpaper2017/CcP-ga_ph4_justdb2.tar.gz tar -xzvf CcP-ga_ph4_justdb2.tar.gz
You can download other databases from the autodude web page: DOCK_3.7_2015/04/15_abl1_Tutorial#set_up_directories_and_get_databases
Prepare system for docking
Write the following script (011.dock.blastermaster.csh) to run blastermaster.
#!/bin/csh
# This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres
# Run on sgehead as jobs are submitted to the queue
# TEB/ MF -- March 2017
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
source /nfs/soft/python/envs/complete/latest/env.csh
set mountdir = `pwd`
set workdir = ${mountdir}/docking/1prep
set pramdir = ${mountdir}/for011.dockprep_parm_files
if ( -s $workdir ) then
echo "$workdir does exits"
exit
endif
mkdir -p $workdir
cd $workdir
ln -s $mountdir/gist/010a.full_gist_combine gistfiles
cp $mountdir/gist/007align_to_md/rec_aligned.pdb .
cp $mountdir/gist/007align_to_md/lig_aligned.pdb .
cat rec_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM /g" | sed -e 's/HEM/HM2/g' >! rec.pdb
cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM /g" > xtal-lig.pdb
# the following lines create a qsub script which submits blastermaster to the queue
cat <<EOF > qsub.csh
#!/bin/csh
#\$ -cwd
#\$ -j yes
#\$ -o stderr
#\$ -q all.q
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
source /nfs/soft/python/envs/complete/latest/env.csh
cd $workdir
$DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v
EOF
qsub qsub.csh
# this will produce two directories:
# 1) working - contains all input and output files that are generated; not needed afterwards but as a reference
# 2) dockfiles - contains everything that is needed to run dock (copied from working)
# grids
# trim.electrostatics.phi
# vdw.vdw
# vdw.bmp
# ligand.desolv.heavy
# ligand.desolv.hydrogen
# spheres
# matching_spheres.sph
Docking
Create and apply a patch to the INDOCK file (012.dock.gistpatch.csh).
# script that modifies INDOCK file to include GIST parameters # will update INDOCK file, so first we back it up to _nogist # then apply patch and call it _gist # TEB/ MF -- March 2017 cd docking/1prep set nominparmflag = 'F' if (nominparmflag == 'T') then cat << EOF > minparm.patch --- INDOCK 2017-03-03 16:24:11.228826066 -0800 +++ INDOCK_min 2017-03-06 11:25:58.425345398 -0800 @@ -51,6 +51,16 @@ ang2_range 10.0 ang1_step 2.5 ang2_step 2.5 +##################################################### +# MINIMIZATION +minimize no +sim_itmax 500 +sim_trnstep 0.2 +sim_rotstep 5.0 +sim_need_to_restart 1.0 +sim_cnvrge 0.1 +min_cut 1.0e15 +iseed 777 ##################################################### # INPUT FILES / THINGS THAT CHANGE receptor_sphere_file ../dockfiles/matching_spheres.sph EOF cp INDOCK INDOCK_ori patch < minparm.patch endif cat << EOF > gist.patch --- INDOCK 2017-03-06 11:27:24.376904919 -0800 +++ INDOCK_gist 2017-03-06 11:33:37.519015401 -0800 @@ -36,6 +36,7 @@ ligand_desolvation volume vdw_maximum 1.0e10 electrostatic_scale 1.0 +gist_scale -1.0 vdw_scale 1.0 internal_scale 0.0 per_atom_scores no @@ -75,6 +76,7 @@ solvmap_file ../dockfiles/ligand.desolv.heavy hydrogen_solvmap_file ../dockfiles/ligand.desolv.hydrogen delphi_file ../dockfiles/trim.electrostatics.phi +gist_file ../gistfiles/gist-EswPlus2Eww_ref.dx chemgrid_file ../dockfiles/vdw.vdw bumpmap_file ../dockfiles/vdw.bmp ############## end of INDOCK EOF cp INDOCK INDOCK_nogist patch < gist.patch mv INDOCK INDOCK_gist #cp INDOCK_nogist INDOCK echo "For minimization: \n cp docking/1prep/INDOCK_nogist docking/1prep/INDOCK_min \n vi docking/1prep/INDOCK_min \n change line from no to yes"
Next dock the enrichment databases (013.dock.submit.lig-dec.docking.csh).
#!/bin/csh
#This script provides a alternative way to dock a DUD-e like ligand-decoy-database for the enrichment evaluation of actives over decoys
#It assumes that ligands and decoys have been pre-prepation (see script blablabla_ToDo) which needs to be run in SF.
# TEB/ MF -- March 2017
setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK
#set indockType = '_gist'
#set indockType = '_nogist'
#set indockType = '_min'
set indockType = '_gist_elstat0p9'
set pwd = `pwd`
#set filedir = "/mnt/nfs/home/jklyu/work/DOCK_tutorial" #CHANGE THIS
set filedir = ${pwd}/docking/1prep #CHANGE THIS
# this is where the work is done:
set mountdir = ${pwd}/docking/2runEnrich # Might CHANGE THIS
set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases" # should contain decoy.smi and ligand.smi for ROC script 00005...csh
## TO DO - rename this outside in the dir structure and call in blbalbalbabla script
if (-s $dude_dir) then
echo " $dude_dir exist"
else
# this is something to modified in future.
# probably better to exit if it is not there.
echo "databases do not exist. "
echo "consider making a symbolic link to the database files"
endif
set list = "4NVA" # CHANGE THIS (pdbname)
foreach pdbname ( $list )
# creates "ligands" and "decoys" and has the aim to dock all of the subsets for those two
foreach db_type ( "ligands" "decoys" )
set workdir1 = "${mountdir}/${pdbname}${indockType}/${db_type}"
echo $workdir1
#
mkdir -p ${workdir1}
cd ${workdir1}
#creat dirlist for *.db2.gz files prepared for docking
ls ${dude_dir}/${db_type}/*.db2.gz > ${db_type}_files.txt
#copy the files needed for dock
cp ${filedir}/INDOCK${indockType} ${workdir1}/INDOCK
ln -s ${filedir}/dockfiles/ ${workdir1}
ln -s ${filedir}/gistfiles/ ${workdir1}
#use dirlist to creat chunks for job submission
python $DOCKBASE/docking/setup/setup_db2_zinc15_file_number.py ./ chunk ./${db_type}_files.txt 500 count
csh $DOCKBASE/docking/submit/submit.csh
end # db_type
end # pdbname
combine the results and get the top scoring poses (014.dock.combineScoresAndPoses.csh.
#!/bin/csh
# This script combines the results from the ligand-decoy run 0003 (all chunks) into a combine file containing dock scores from OUTDOCK files
# Three files are produced (one for lig, decoy and both)
# and: a file which has top poses as specified (e.g. top 1000 molecules with 2 poses each); two files (for lig and for decoys)
# TEB/ MF -- March 2017
# to remove dir
# rm -fr pdbs/3O1D/ligands-decoys/ligands/allChunksCombined/ pdbs/3O1D/ligands-decoys/decoys/allChunksCombined/ pdbs/3O1D/ligands-decoys/dockedLigDecoyCombined/
setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK
source /nfs/soft/python/envs/complete/latest/env.csh
set filedir = `pwd`
set mountdir = $filedir/docking/2runEnrich/
set d37 = $DOCKBASE/analysis/
cd $mountdir
#set list = "4NVA_gist 4NVA_nogist"
#set list = "4NVA_gist"
#set list = "4NVA_min"
set list = "4NVA_gist_elstat0p9"
#set list = "4NVA_nogist"
#set list = `cat filename`
#set list = `cat $1`
foreach pdbname ( $list )
foreach db_type ( "ligands" "decoys" )
set workdir = ${mountdir}/${pdbname}/${db_type}/allChunksCombined
echo $pdbname
mkdir -p ${workdir}
cd ${workdir}
# creates a file called dirlist that contains the full path of all directories with docked runs (chunks)
ls -ld ${mountdir}/${pdbname}/${db_type}/* | awk '/chunk/{print $9}' > dirlist
# for debuging
#echo "print $db_type dirlist:"
#cat dirlist
# script extracts scores from all docking runs specified in dirlist
$d37/extract_all.py
# script gets poses for top scoring molecules and produces poses.mol2 (default name)
$d37/getposes.py -d ${mountdir}/${pdbname}/${db_type}
end # db_type
## combine decoyes and actives
set workdir = ${mountdir}/${pdbname}/dockedLigDecoyCombined
rm -rf ${workdir}
mkdir -p ${workdir}
cd ${workdir}
cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist
# for debuging
#echo "print ALL dirlist"
#cat dirlist
$d37/extract_all.py
#$d37/getposes.py -d ${mountdir}/${pdbname} # doesn't work yet; not really needed
#getposes.py -z -l 1000 -x 2 -f extract_all.sort.uniq.txt -o ligands.1000.mol2 -d /mnt/nfs/work/users/fischer/VDR/27Jan2014_learningDOCKrgc/Enrichment/1DB1/DOCKING/ligands
end # pdbname
Enrichment
Here is the script for calculating enrichment (015.dock.AUCplot_of-lig-decoys.csh):
#!/bin/csh
# This script creates a log adjusted AUC (ROC) plot with ligand vs decoy results
# need X11 forwarding enabled when running remotely (ssh sgehead -X).
# TEB/ MF -- March 2017
setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK
source /nfs/soft/python/envs/complete/latest/env.csh
set filedir = `pwd`
set mountdir = $filedir/docking/2runEnrich/
set d37 = $DOCKBASE/analysis/
cd $mountdir
set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases" # should contain decoy.smi and ligand.smi for ROC script 00005...csh
#
#ln -s $dude_dir/ligands/ligands.smi .
#ln -s $dude_dir/decoys/decoys.smi .
# CHANGE THIS
#set list = "4NVA_gist 4NVA_nogist"
#set list = "4NVA_gist"
#set list = "4NVA_nogist"
#set list = "4NVA_min"
set list = "4NVA_gist_elstat0p9"
#set list = `cat filename`
#set list = `cat $1`
foreach pdbname ( $list )
set workdir = ${mountdir}/${pdbname}/ROC_ligdecoy/
# This script will not work without the following line:
echo "HERE is the HAWK"
# checks that previous script 0003 has produced mol2 files
if (! ( -s $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2) && ! (-s $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2 )) then
ls -l $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2
ls -l $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2
echo "skipping ${pdbname}. cannot generate ROC"
continue
endif
rm -rf $workdir
mkdir -p $workdir
cd $workdir
#wget http://dude.docking.org/targets/aa2ar/actives_final.ism
# reads ZINC ids (ligand or decoy molecule names)
# everything
awk '{print $2}' $dude_dir/decoys/decoys.smi > decoys.name
awk '{printf "%9s\n", $2}' $dude_dir/ligands/ligands.smi > ligands.name
# for other names e.g. CHEMBL try this instead
#awk '{printf "%9s\n", $3}' $dude_dir/ligands/ligands.smi > ligands.name
cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist
#which enrich.py
set enrich_py = $d37/enrich.py
set plots_py = $d37/plots.py
#
# calculates AUCs, stores in txt file which is then plotted for all ligands and decoys
# - i is the flag for the input directory, this dir should contain the extract_all.sort.uniq.txt.
# the scripts enrich_py and plots_py will go through the extract file and look for the ligand and decoy names.
# when it finds them it will populate the ROC cruve. these values are devied by the total number of ligand or decoys.
# note that ofen not all ligands and not all decoy finish so the point (1,1) is always included and interpolations is performed . . .
#
#python ${enrich_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name
#python ${plots_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname
python ${enrich_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name
python ${plots_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname # to plot logAUC
python ${plots_py} -i . -o auc_plot -n --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname #-n flag to plot "normal" AUC
end #pbdname
echo " logAUCs > Look at plots: \n eog $mountdir/$list/ROC_ligdecoy/*png \n "
echo " > Or better for printing: \n gthumb $mountdir/$list/ROC_ligdecoy/*png \n"
echo "For EF's > see script 015.c.calc_enrichment_factor_gen.py for calculation of EF's \n"
# For AUC's docking/2runEnrich/4NVA_gist/ROC_ligdecoy/roc_own.txt is used for plotting.
# Note that roc_own.txt is for AUC but roc_own.png in /ROC_ligdecoy/ dir contains logAUC plot.
prepare system for docking (flex)
First we need to align the pdb with multiple states in the MD frame (050.flex.alignwithchimera_gist_conf.csh).
#!/bin/csh
## this script was written by trent balius in the Rizzo Group, 2011
## modified in the Shoichet Group, 2013-2015
# TEB, MF comments -- March 2017
# This shell script will do the following:
# (1) aligns the flexible receptor onto the 10th (production) MD frame -- as before for ligand.
# Hence we can make a simlink using that ligand.
set mountdir = `pwd`
set workdir = $mountdir/flex/01align_to_md
rm -rf $workdir
mkdir -p $workdir
cd $workdir
#ln -s ${mountdir}/MDrundir/prep/002md_align .
curl docking.org/~tbalius/code/waterpaper2017/parms/APO_rt_loop.pdb > APO_rt_loop.pdb
set ref = "$mountdir/gist/006ref/ref.pdb" # snapshot from simulation
set rec = "$workdir/APO_rt_loop.pdb" # rec with flexibility
set chimerapath = "/nfs/soft/chimera/current/bin/chimera"
#write instruction file for chimera based alignment
cat << EOF > chimera.com
# template #0
open $ref
# rec #1
open $rec
# move original to gist. it is harder to move the gist grids.
mmaker #0 #1
write format pdb 0 ref.pdb
write format pdb 1 rec_aligned.pdb
EOF
${chimerapath} --nogui chimera.com > & chimera.com.out
# we have already aligned the ligand previously.
ln -s $mountdir/gist/007align_to_md/lig_aligned.pdb .
Next we prepare this receptor for docking using blastermaster with the flexible receptor-flag (051.flex.blastermaster.csh).
#!/bin/csh
# This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres
# Run on sgehead as jobs are submitted to the queue
# TEB/ MF - March 2017
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
source /nfs/soft/python/envs/complete/latest/env.csh
#set Multiplier = 1.0
set Multiplier = 2.0
set reslist = 186+187+188+189+190+191+192+193+194,199,228
# if you need help selecting which residues to include as flexible the following line find all entries with a letter in the alternative conformation column
# grep "ATOM" rec.pdb | grep -v "^................ " | cut -c17-26 | uniq
set mountdir = `pwd`
set workdir = ${mountdir}/flex/2prep
set pramdir = ${mountdir}/for011.dockprep_parm_files
if ( -s $workdir ) then
echo "$workdir does exits"
exit
endif
mkdir -p $workdir
cd $workdir
ln -s $mountdir/gist/010a.full_gist_combine gistfiles
cp $mountdir/flex/01align_to_md/rec_aligned.pdb .
cp $mountdir/flex/01align_to_md/lig_aligned.pdb .
cat rec_aligned.pdb | grep -v "HOH" | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM /g" | sed -e 's/HEM/HM2/g' >! rec.pdb
cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM /g" > xtal-lig.pdb
# the following lines create a qsub script which submits blastermaster to the queue
cat <<EOF > qsub.csh
#!/bin/csh
#\$ -cwd
#\$ -j yes
#\$ -o stderr
#\$ -q all.q
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK"
source /nfs/soft/python/envs/complete/latest/env.csh
cd $workdir
$DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v -f --flexiblePenaltyM=$Multiplier --flexibleResidues=${reslist}
EOF
qsub qsub.csh
# this will produce two directories:
# 1) working - contains all input and output files that are generated; not needed afterwards but as a reference
# 2) dockfiles - contains everything that is needed to run dock (copied from working)
# grids
# trim.electrostatics.phi
# vdw.vdw
# vdw.bmp
# ligand.desolv.heavy
# ligand.desolv.hydrogen
# spheres
# matching_spheres.sph