Tutorial on running DOCK3.7 with GIST: Difference between revisions
No edit summary |
|||
(8 intermediate revisions by the same user not shown) | |||
Line 4: | Line 4: | ||
here are more GIST related tutorials: [[ DOCK_3.7_with_GIST_tutorials]] | here are more GIST related tutorials: [[ DOCK_3.7_with_GIST_tutorials]] | ||
This tutorial assumes that you have already completed the MD tutorial [[Tutorial on running Molecular Dynamics for GIST grid generation]]. | This tutorial assumes that you have already completed the MD tutorial [[Tutorial on running Molecular Dynamics for GIST grid generation with scripts]]. | ||
# use | # use the receptor and ligand the align to the MD frame. | ||
# prepare the receptor for docking using blastermaster. Flexible receptor docking is approximated by docking to 16 states. | # prepare the receptor for docking using blastermaster. Flexible receptor docking is approximated by docking to 16 states. | ||
# modify the INDOCK file. | # modify the INDOCK file. | ||
Line 12: | Line 12: | ||
# Dock ligand and decoy databases and perform enrichment analysis | # Dock ligand and decoy databases and perform enrichment analysis | ||
# perform enrichment analyis | # perform enrichment analyis | ||
== Get ligand databases == | |||
For the CcP ligands download the dock-ready databases from here: | |||
wget http://docking.org/~tbalius/code/waterpaper2017/CcP-ga_ph4_justdb2.tar.gz | |||
wget http://docking.org/~tbalius/code/waterpaper2017/for011.dockprep_parm_files.tar.gz | |||
tar -xzvf CcP-ga_ph4_justdb2.tar.gz | |||
tar -xzvf for011.dockprep_parm_files.tar.gz | |||
You can download other databases from the autodude web page: | |||
[[DOCK_3.7_2015/04/15_abl1_Tutorial#set_up_directories_and_get_databases]] | |||
== Prepare system for docking == | |||
Write the following script (011.dock.blastermaster.csh) to run blastermaster. | |||
#!/bin/csh | |||
# This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres | |||
# Run on sgehead as jobs are submitted to the queue | |||
# TEB/ MF -- March 2017 | |||
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" | |||
source /nfs/soft/python/envs/complete/latest/env.csh | |||
set mountdir = `pwd` | |||
set workdir = ${mountdir}/docking/1prep | |||
set pramdir = ${mountdir}/for011.dockprep_parm_files | |||
if ( -s $workdir ) then | |||
echo "$workdir does exits" | |||
exit | |||
endif | |||
mkdir -p $workdir | |||
cd $workdir | |||
ln -s $mountdir/gist/010a.full_gist_combine gistfiles | |||
cp $mountdir/gist/007align_to_md/rec_aligned.pdb . | |||
cp $mountdir/gist/007align_to_md/lig_aligned.pdb . | |||
cat rec_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM /g" | sed -e 's/HEM/HM2/g' >! rec.pdb | |||
cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM /g" > xtal-lig.pdb | |||
# the following lines create a qsub script which submits blastermaster to the queue | |||
cat <<EOF > qsub.csh | |||
#!/bin/csh | |||
#\$ -cwd | |||
#\$ -j yes | |||
#\$ -o stderr | |||
#\$ -q all.q | |||
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" | |||
source /nfs/soft/python/envs/complete/latest/env.csh | |||
cd $workdir | |||
$DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v | |||
# modify to the following if there is no cofactor | |||
# $DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " -v | |||
EOF | |||
qsub qsub.csh | |||
# this will produce two directories: | |||
# 1) working - contains all input and output files that are generated; not needed afterwards but as a reference | |||
# 2) dockfiles - contains everything that is needed to run dock (copied from working) | |||
# grids | |||
# trim.electrostatics.phi | |||
# vdw.vdw | |||
# vdw.bmp | |||
# ligand.desolv.heavy | |||
# ligand.desolv.hydrogen | |||
# spheres | |||
# matching_spheres.sph | |||
== Docking == | |||
Create and apply a patch to the INDOCK file (012.dock.gistpatch.csh). | |||
# script that modifies INDOCK file to include GIST parameters | |||
# will update INDOCK file, so first we back it up to _nogist | |||
# then apply patch and call it _gist | |||
# TEB/ MF -- March 2017 | |||
cd docking/1prep | |||
set nominparmflag = 'F' | |||
if (nominparmflag == 'T') then | |||
cat << EOF > minparm.patch | |||
--- INDOCK 2017-03-03 16:24:11.228826066 -0800 | |||
+++ INDOCK_min 2017-03-06 11:25:58.425345398 -0800 | |||
@@ -51,6 +51,16 @@ | |||
ang2_range 10.0 | |||
ang1_step 2.5 | |||
ang2_step 2.5 | |||
+##################################################### | |||
+# MINIMIZATION | |||
+minimize no | |||
+sim_itmax 500 | |||
+sim_trnstep 0.2 | |||
+sim_rotstep 5.0 | |||
+sim_need_to_restart 1.0 | |||
+sim_cnvrge 0.1 | |||
+min_cut 1.0e15 | |||
+iseed 777 | |||
##################################################### | |||
# INPUT FILES / THINGS THAT CHANGE | |||
receptor_sphere_file ../dockfiles/matching_spheres.sph | |||
EOF | |||
cp INDOCK INDOCK_ori | |||
patch < minparm.patch | |||
endif | |||
cat << EOF > gist.patch | |||
--- INDOCK 2017-03-06 11:27:24.376904919 -0800 | |||
+++ INDOCK_gist 2017-03-06 11:33:37.519015401 -0800 | |||
@@ -36,6 +36,7 @@ | |||
ligand_desolvation volume | |||
vdw_maximum 1.0e10 | |||
electrostatic_scale 1.0 | |||
+gist_scale -1.0 | |||
vdw_scale 1.0 | |||
internal_scale 0.0 | |||
per_atom_scores no | |||
@@ -75,6 +76,7 @@ | |||
solvmap_file ../dockfiles/ligand.desolv.heavy | |||
hydrogen_solvmap_file ../dockfiles/ligand.desolv.hydrogen | |||
delphi_file ../dockfiles/trim.electrostatics.phi | |||
+gist_file ../gistfiles/gist-EswPlus2Eww_ref.dx | |||
chemgrid_file ../dockfiles/vdw.vdw | |||
bumpmap_file ../dockfiles/vdw.bmp | |||
############## end of INDOCK | |||
EOF | |||
cp INDOCK INDOCK_nogist | |||
patch < gist.patch | |||
mv INDOCK INDOCK_gist | |||
#cp INDOCK_nogist INDOCK | |||
echo "For minimization: \n cp docking/1prep/INDOCK_nogist docking/1prep/INDOCK_min \n vi docking/1prep/INDOCK_min \n change line from no to yes" | |||
Next dock the enrichment databases (013.dock.submit.lig-dec.docking.csh). | |||
#!/bin/csh | |||
#This script provides a alternative way to dock a DUD-e like ligand-decoy-database for the enrichment evaluation of actives over decoys | |||
#It assumes that ligands and decoys have been pre-prepation (see script blablabla_ToDo) which needs to be run in SF. | |||
# TEB/ MF -- March 2017 | |||
setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK | |||
#set indockType = '_gist' | |||
#set indockType = '_nogist' | |||
#set indockType = '_min' | |||
set indockType = '_gist_elstat0p9' | |||
set pwd = `pwd` | |||
#set filedir = "/mnt/nfs/home/jklyu/work/DOCK_tutorial" #CHANGE THIS | |||
set filedir = ${pwd}/docking/1prep #CHANGE THIS | |||
# this is where the work is done: | |||
set mountdir = ${pwd}/docking/2runEnrich # Might CHANGE THIS | |||
set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases" # should contain decoy.smi and ligand.smi for ROC script 00005...csh | |||
## TO DO - rename this outside in the dir structure and call in blbalbalbabla script | |||
if (-s $dude_dir) then | |||
echo " $dude_dir exist" | |||
else | |||
# this is something to modified in future. | |||
# probably better to exit if it is not there. | |||
echo "databases do not exist. " | |||
echo "consider making a symbolic link to the database files" | |||
endif | |||
set list = "4NVA" # CHANGE THIS (pdbname) | |||
foreach pdbname ( $list ) | |||
# creates "ligands" and "decoys" and has the aim to dock all of the subsets for those two | |||
foreach db_type ( "ligands" "decoys" ) | |||
set workdir1 = "${mountdir}/${pdbname}${indockType}/${db_type}" | |||
echo $workdir1 | |||
# | |||
mkdir -p ${workdir1} | |||
cd ${workdir1} | |||
#creat dirlist for *.db2.gz files prepared for docking | |||
ls ${dude_dir}/${db_type}/*.db2.gz > ${db_type}_files.txt | |||
#copy the files needed for dock | |||
cp ${filedir}/INDOCK${indockType} ${workdir1}/INDOCK | |||
ln -s ${filedir}/dockfiles/ ${workdir1} | |||
ln -s ${filedir}/gistfiles/ ${workdir1} | |||
#use dirlist to creat chunks for job submission | |||
python $DOCKBASE/docking/setup/setup_db2_zinc15_file_number.py ./ chunk ./${db_type}_files.txt 500 count | |||
csh $DOCKBASE/docking/submit/submit.csh | |||
end # db_type | |||
end # pdbname | |||
combine the results and get the top scoring poses (014.dock.combineScoresAndPoses.csh. | |||
#!/bin/csh | |||
# This script combines the results from the ligand-decoy run 0003 (all chunks) into a combine file containing dock scores from OUTDOCK files | |||
# Three files are produced (one for lig, decoy and both) | |||
# and: a file which has top poses as specified (e.g. top 1000 molecules with 2 poses each); two files (for lig and for decoys) | |||
# TEB/ MF -- March 2017 | |||
# to remove dir | |||
# rm -fr pdbs/3O1D/ligands-decoys/ligands/allChunksCombined/ pdbs/3O1D/ligands-decoys/decoys/allChunksCombined/ pdbs/3O1D/ligands-decoys/dockedLigDecoyCombined/ | |||
setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK | |||
source /nfs/soft/python/envs/complete/latest/env.csh | |||
set filedir = `pwd` | |||
set mountdir = $filedir/docking/2runEnrich/ | |||
set d37 = $DOCKBASE/analysis/ | |||
cd $mountdir | |||
#set list = "4NVA_gist 4NVA_nogist" | |||
#set list = "4NVA_gist" | |||
#set list = "4NVA_min" | |||
set list = "4NVA_gist_elstat0p9" | |||
#set list = "4NVA_nogist" | |||
#set list = `cat filename` | |||
#set list = `cat $1` | |||
foreach pdbname ( $list ) | |||
foreach db_type ( "ligands" "decoys" ) | |||
set workdir = ${mountdir}/${pdbname}/${db_type}/allChunksCombined | |||
echo $pdbname | |||
mkdir -p ${workdir} | |||
cd ${workdir} | |||
# creates a file called dirlist that contains the full path of all directories with docked runs (chunks) | |||
ls -ld ${mountdir}/${pdbname}/${db_type}/* | awk '/chunk/{print $9}' > dirlist | |||
# for debuging | |||
#echo "print $db_type dirlist:" | |||
#cat dirlist | |||
# script extracts scores from all docking runs specified in dirlist | |||
$d37/extract_all.py | |||
# script gets poses for top scoring molecules and produces poses.mol2 (default name) | |||
$d37/getposes.py -d ${mountdir}/${pdbname}/${db_type} | |||
end # db_type | |||
## combine decoyes and actives | |||
set workdir = ${mountdir}/${pdbname}/dockedLigDecoyCombined | |||
rm -rf ${workdir} | |||
mkdir -p ${workdir} | |||
cd ${workdir} | |||
cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist | |||
# for debuging | |||
#echo "print ALL dirlist" | |||
#cat dirlist | |||
$d37/extract_all.py | |||
#$d37/getposes.py -d ${mountdir}/${pdbname} # doesn't work yet; not really needed | |||
#getposes.py -z -l 1000 -x 2 -f extract_all.sort.uniq.txt -o ligands.1000.mol2 -d /mnt/nfs/work/users/fischer/VDR/27Jan2014_learningDOCKrgc/Enrichment/1DB1/DOCKING/ligands | |||
end # pdbname | |||
== Enrichment == | |||
Here is the script for calculating enrichment (015.dock.AUCplot_of-lig-decoys.csh): | |||
#!/bin/csh | |||
# This script creates a log adjusted AUC (ROC) plot with ligand vs decoy results | |||
# need X11 forwarding enabled when running remotely (ssh sgehead -X). | |||
# TEB/ MF -- March 2017 | |||
setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK | |||
source /nfs/soft/python/envs/complete/latest/env.csh | |||
set filedir = `pwd` | |||
set mountdir = $filedir/docking/2runEnrich/ | |||
set d37 = $DOCKBASE/analysis/ | |||
cd $mountdir | |||
set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases" # should contain decoy.smi and ligand.smi for ROC script 00005...csh | |||
# | |||
#ln -s $dude_dir/ligands/ligands.smi . | |||
#ln -s $dude_dir/decoys/decoys.smi . | |||
# CHANGE THIS | |||
#set list = "4NVA_gist 4NVA_nogist" | |||
#set list = "4NVA_gist" | |||
#set list = "4NVA_nogist" | |||
#set list = "4NVA_min" | |||
set list = "4NVA_gist_elstat0p9" | |||
#set list = `cat filename` | |||
#set list = `cat $1` | |||
foreach pdbname ( $list ) | |||
set workdir = ${mountdir}/${pdbname}/ROC_ligdecoy/ | |||
# This script will not work without the following line: | |||
echo "HERE is the HAWK" | |||
# checks that previous script 0003 has produced mol2 files | |||
if (! ( -s $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2) && ! (-s $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2 )) then | |||
ls -l $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2 | |||
ls -l $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2 | |||
echo "skipping ${pdbname}. cannot generate ROC" | |||
continue | |||
endif | |||
rm -rf $workdir | |||
mkdir -p $workdir | |||
cd $workdir | |||
#wget http://dude.docking.org/targets/aa2ar/actives_final.ism | |||
# reads ZINC ids (ligand or decoy molecule names) | |||
# everything | |||
awk '{print $2}' $dude_dir/decoys/decoys.smi > decoys.name | |||
awk '{printf "%9s\n", $2}' $dude_dir/ligands/ligands.smi > ligands.name | |||
# for other names e.g. CHEMBL try this instead | |||
#awk '{printf "%9s\n", $3}' $dude_dir/ligands/ligands.smi > ligands.name | |||
cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist | |||
#which enrich.py | |||
set enrich_py = $d37/enrich.py | |||
set plots_py = $d37/plots.py | |||
# | |||
# calculates AUCs, stores in txt file which is then plotted for all ligands and decoys | |||
# - i is the flag for the input directory, this dir should contain the extract_all.sort.uniq.txt. | |||
# the scripts enrich_py and plots_py will go through the extract file and look for the ligand and decoy names. | |||
# when it finds them it will populate the ROC cruve. these values are devied by the total number of ligand or decoys. | |||
# note that ofen not all ligands and not all decoy finish so the point (1,1) is always included and interpolations is performed . . . | |||
# | |||
#python ${enrich_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name | |||
#python ${plots_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname | |||
python ${enrich_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name | |||
python ${plots_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname # to plot logAUC | |||
python ${plots_py} -i . -o auc_plot -n --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname #-n flag to plot "normal" AUC | |||
end #pbdname | |||
echo " logAUCs > Look at plots: \n eog $mountdir/$list/ROC_ligdecoy/*png \n " | |||
echo " > Or better for printing: \n gthumb $mountdir/$list/ROC_ligdecoy/*png \n" | |||
echo "For EF's > see script 015.c.calc_enrichment_factor_gen.py for calculation of EF's \n" | |||
# For AUC's docking/2runEnrich/4NVA_gist/ROC_ligdecoy/roc_own.txt is used for plotting. | |||
# Note that roc_own.txt is for AUC but roc_own.png in /ROC_ligdecoy/ dir contains logAUC plot. | |||
== prepare system for docking (flex) == | |||
First we need to align the pdb with multiple states in the MD frame (050.flex.alignwithchimera_gist_conf.csh). | |||
#!/bin/csh | |||
## this script was written by trent balius in the Rizzo Group, 2011 | |||
## modified in the Shoichet Group, 2013-2015 | |||
# TEB, MF comments -- March 2017 | |||
# This shell script will do the following: | |||
# (1) aligns the flexible receptor onto the 10th (production) MD frame -- as before for ligand. | |||
# Hence we can make a simlink using that ligand. | |||
set mountdir = `pwd` | |||
set workdir = $mountdir/flex/01align_to_md | |||
rm -rf $workdir | |||
mkdir -p $workdir | |||
cd $workdir | |||
#ln -s ${mountdir}/MDrundir/prep/002md_align . | |||
curl docking.org/~tbalius/code/waterpaper2017/parms/APO_rt_loop.pdb > APO_rt_loop.pdb | |||
set ref = "$mountdir/gist/006ref/ref.pdb" # snapshot from simulation | |||
set rec = "$workdir/APO_rt_loop.pdb" # rec with flexibility | |||
set chimerapath = "/nfs/soft/chimera/current/bin/chimera" | |||
#write instruction file for chimera based alignment | |||
cat << EOF > chimera.com | |||
# template #0 | |||
open $ref | |||
# rec #1 | |||
open $rec | |||
# move original to gist. it is harder to move the gist grids. | |||
mmaker #0 #1 | |||
write format pdb 0 ref.pdb | |||
write format pdb 1 rec_aligned.pdb | |||
EOF | |||
${chimerapath} --nogui chimera.com > & chimera.com.out | |||
# we have already aligned the ligand previously. | |||
ln -s $mountdir/gist/007align_to_md/lig_aligned.pdb . | |||
Next we prepare this receptor for docking using blastermaster with the flexible receptor-flag (051.flex.blastermaster.csh). | |||
#!/bin/csh | |||
# This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres | |||
# Run on sgehead as jobs are submitted to the queue | |||
# TEB/ MF - March 2017 | |||
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" | |||
source /nfs/soft/python/envs/complete/latest/env.csh | |||
#set Multiplier = 1.0 | |||
set Multiplier = 2.0 | |||
set reslist = 186+187+188+189+190+191+192+193+194,199,228 | |||
# if you need help selecting which residues to include as flexible the following line find all entries with a letter in the alternative conformation column | |||
# grep "ATOM" rec.pdb | grep -v "^................ " | cut -c17-26 | uniq | |||
set mountdir = `pwd` | |||
set workdir = ${mountdir}/flex/2prep | |||
set pramdir = ${mountdir}/for011.dockprep_parm_files | |||
if ( -s $workdir ) then | |||
echo "$workdir does exits" | |||
exit | |||
endif | |||
mkdir -p $workdir | |||
cd $workdir | |||
ln -s $mountdir/gist/010a.full_gist_combine gistfiles | |||
cp $mountdir/flex/01align_to_md/rec_aligned.pdb . | |||
cp $mountdir/flex/01align_to_md/lig_aligned.pdb . | |||
cat rec_aligned.pdb | grep -v "HOH" | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM /g" | sed -e 's/HEM/HM2/g' >! rec.pdb | |||
cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM /g" > xtal-lig.pdb | |||
# the following lines create a qsub script which submits blastermaster to the queue | |||
cat <<EOF > qsub.csh | |||
#!/bin/csh | |||
#\$ -cwd | |||
#\$ -j yes | |||
#\$ -o stderr | |||
#\$ -q all.q | |||
setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" | |||
source /nfs/soft/python/envs/complete/latest/env.csh | |||
cd $workdir | |||
$DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v -f --flexiblePenaltyM=$Multiplier --flexibleResidues=${reslist} | |||
EOF | |||
qsub qsub.csh | |||
# this will produce two directories: | |||
# 1) working - contains all input and output files that are generated; not needed afterwards but as a reference | |||
# 2) dockfiles - contains everything that is needed to run dock (copied from working) | |||
# grids | |||
# trim.electrostatics.phi | |||
# vdw.vdw | |||
# vdw.bmp | |||
# ligand.desolv.heavy | |||
# ligand.desolv.hydrogen | |||
# spheres | |||
# matching_spheres.sph | |||
== docking (flex) == | |||
== enrichment (flex) == |
Latest revision as of 01:31, 16 November 2017
Tutorial by Trent Balius (2017/01/30).
here are more GIST related tutorials: DOCK_3.7_with_GIST_tutorials
This tutorial assumes that you have already completed the MD tutorial Tutorial on running Molecular Dynamics for GIST grid generation with scripts.
- use the receptor and ligand the align to the MD frame.
- prepare the receptor for docking using blastermaster. Flexible receptor docking is approximated by docking to 16 states.
- modify the INDOCK file.
- download (or generate) ligand and decoy DUD-E like databases.
- Dock ligand and decoy databases and perform enrichment analysis
- perform enrichment analyis
Get ligand databases
For the CcP ligands download the dock-ready databases from here:
wget http://docking.org/~tbalius/code/waterpaper2017/CcP-ga_ph4_justdb2.tar.gz wget http://docking.org/~tbalius/code/waterpaper2017/for011.dockprep_parm_files.tar.gz tar -xzvf CcP-ga_ph4_justdb2.tar.gz tar -xzvf for011.dockprep_parm_files.tar.gz
You can download other databases from the autodude web page: DOCK_3.7_2015/04/15_abl1_Tutorial#set_up_directories_and_get_databases
Prepare system for docking
Write the following script (011.dock.blastermaster.csh) to run blastermaster.
#!/bin/csh # This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres # Run on sgehead as jobs are submitted to the queue # TEB/ MF -- March 2017 setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" source /nfs/soft/python/envs/complete/latest/env.csh set mountdir = `pwd` set workdir = ${mountdir}/docking/1prep set pramdir = ${mountdir}/for011.dockprep_parm_files if ( -s $workdir ) then echo "$workdir does exits" exit endif mkdir -p $workdir cd $workdir ln -s $mountdir/gist/010a.full_gist_combine gistfiles cp $mountdir/gist/007align_to_md/rec_aligned.pdb . cp $mountdir/gist/007align_to_md/lig_aligned.pdb . cat rec_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM /g" | sed -e 's/HEM/HM2/g' >! rec.pdb cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM /g" > xtal-lig.pdb # the following lines create a qsub script which submits blastermaster to the queue cat <<EOF > qsub.csh #!/bin/csh #\$ -cwd #\$ -j yes #\$ -o stderr #\$ -q all.q setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" source /nfs/soft/python/envs/complete/latest/env.csh cd $workdir $DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v # modify to the following if there is no cofactor # $DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " -v EOF qsub qsub.csh # this will produce two directories: # 1) working - contains all input and output files that are generated; not needed afterwards but as a reference # 2) dockfiles - contains everything that is needed to run dock (copied from working) # grids # trim.electrostatics.phi # vdw.vdw # vdw.bmp # ligand.desolv.heavy # ligand.desolv.hydrogen # spheres # matching_spheres.sph
Docking
Create and apply a patch to the INDOCK file (012.dock.gistpatch.csh).
# script that modifies INDOCK file to include GIST parameters # will update INDOCK file, so first we back it up to _nogist # then apply patch and call it _gist # TEB/ MF -- March 2017 cd docking/1prep set nominparmflag = 'F' if (nominparmflag == 'T') then cat << EOF > minparm.patch --- INDOCK 2017-03-03 16:24:11.228826066 -0800 +++ INDOCK_min 2017-03-06 11:25:58.425345398 -0800 @@ -51,6 +51,16 @@ ang2_range 10.0 ang1_step 2.5 ang2_step 2.5 +##################################################### +# MINIMIZATION +minimize no +sim_itmax 500 +sim_trnstep 0.2 +sim_rotstep 5.0 +sim_need_to_restart 1.0 +sim_cnvrge 0.1 +min_cut 1.0e15 +iseed 777 ##################################################### # INPUT FILES / THINGS THAT CHANGE receptor_sphere_file ../dockfiles/matching_spheres.sph EOF cp INDOCK INDOCK_ori patch < minparm.patch endif cat << EOF > gist.patch --- INDOCK 2017-03-06 11:27:24.376904919 -0800 +++ INDOCK_gist 2017-03-06 11:33:37.519015401 -0800 @@ -36,6 +36,7 @@ ligand_desolvation volume vdw_maximum 1.0e10 electrostatic_scale 1.0 +gist_scale -1.0 vdw_scale 1.0 internal_scale 0.0 per_atom_scores no @@ -75,6 +76,7 @@ solvmap_file ../dockfiles/ligand.desolv.heavy hydrogen_solvmap_file ../dockfiles/ligand.desolv.hydrogen delphi_file ../dockfiles/trim.electrostatics.phi +gist_file ../gistfiles/gist-EswPlus2Eww_ref.dx chemgrid_file ../dockfiles/vdw.vdw bumpmap_file ../dockfiles/vdw.bmp ############## end of INDOCK EOF cp INDOCK INDOCK_nogist patch < gist.patch mv INDOCK INDOCK_gist #cp INDOCK_nogist INDOCK echo "For minimization: \n cp docking/1prep/INDOCK_nogist docking/1prep/INDOCK_min \n vi docking/1prep/INDOCK_min \n change line from no to yes"
Next dock the enrichment databases (013.dock.submit.lig-dec.docking.csh).
#!/bin/csh #This script provides a alternative way to dock a DUD-e like ligand-decoy-database for the enrichment evaluation of actives over decoys #It assumes that ligands and decoys have been pre-prepation (see script blablabla_ToDo) which needs to be run in SF. # TEB/ MF -- March 2017 setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK #set indockType = '_gist' #set indockType = '_nogist' #set indockType = '_min' set indockType = '_gist_elstat0p9' set pwd = `pwd` #set filedir = "/mnt/nfs/home/jklyu/work/DOCK_tutorial" #CHANGE THIS set filedir = ${pwd}/docking/1prep #CHANGE THIS # this is where the work is done: set mountdir = ${pwd}/docking/2runEnrich # Might CHANGE THIS set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases" # should contain decoy.smi and ligand.smi for ROC script 00005...csh ## TO DO - rename this outside in the dir structure and call in blbalbalbabla script if (-s $dude_dir) then echo " $dude_dir exist" else # this is something to modified in future. # probably better to exit if it is not there. echo "databases do not exist. " echo "consider making a symbolic link to the database files" endif set list = "4NVA" # CHANGE THIS (pdbname) foreach pdbname ( $list ) # creates "ligands" and "decoys" and has the aim to dock all of the subsets for those two foreach db_type ( "ligands" "decoys" ) set workdir1 = "${mountdir}/${pdbname}${indockType}/${db_type}" echo $workdir1 # mkdir -p ${workdir1} cd ${workdir1} #creat dirlist for *.db2.gz files prepared for docking ls ${dude_dir}/${db_type}/*.db2.gz > ${db_type}_files.txt #copy the files needed for dock cp ${filedir}/INDOCK${indockType} ${workdir1}/INDOCK ln -s ${filedir}/dockfiles/ ${workdir1} ln -s ${filedir}/gistfiles/ ${workdir1} #use dirlist to creat chunks for job submission python $DOCKBASE/docking/setup/setup_db2_zinc15_file_number.py ./ chunk ./${db_type}_files.txt 500 count csh $DOCKBASE/docking/submit/submit.csh end # db_type end # pdbname
combine the results and get the top scoring poses (014.dock.combineScoresAndPoses.csh.
#!/bin/csh # This script combines the results from the ligand-decoy run 0003 (all chunks) into a combine file containing dock scores from OUTDOCK files # Three files are produced (one for lig, decoy and both) # and: a file which has top poses as specified (e.g. top 1000 molecules with 2 poses each); two files (for lig and for decoys) # TEB/ MF -- March 2017 # to remove dir # rm -fr pdbs/3O1D/ligands-decoys/ligands/allChunksCombined/ pdbs/3O1D/ligands-decoys/decoys/allChunksCombined/ pdbs/3O1D/ligands-decoys/dockedLigDecoyCombined/ setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK source /nfs/soft/python/envs/complete/latest/env.csh set filedir = `pwd` set mountdir = $filedir/docking/2runEnrich/ set d37 = $DOCKBASE/analysis/ cd $mountdir #set list = "4NVA_gist 4NVA_nogist" #set list = "4NVA_gist" #set list = "4NVA_min" set list = "4NVA_gist_elstat0p9" #set list = "4NVA_nogist" #set list = `cat filename` #set list = `cat $1` foreach pdbname ( $list ) foreach db_type ( "ligands" "decoys" ) set workdir = ${mountdir}/${pdbname}/${db_type}/allChunksCombined echo $pdbname mkdir -p ${workdir} cd ${workdir} # creates a file called dirlist that contains the full path of all directories with docked runs (chunks) ls -ld ${mountdir}/${pdbname}/${db_type}/* | awk '/chunk/{print $9}' > dirlist # for debuging #echo "print $db_type dirlist:" #cat dirlist # script extracts scores from all docking runs specified in dirlist $d37/extract_all.py # script gets poses for top scoring molecules and produces poses.mol2 (default name) $d37/getposes.py -d ${mountdir}/${pdbname}/${db_type} end # db_type ## combine decoyes and actives set workdir = ${mountdir}/${pdbname}/dockedLigDecoyCombined rm -rf ${workdir} mkdir -p ${workdir} cd ${workdir} cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist # for debuging #echo "print ALL dirlist" #cat dirlist $d37/extract_all.py #$d37/getposes.py -d ${mountdir}/${pdbname} # doesn't work yet; not really needed #getposes.py -z -l 1000 -x 2 -f extract_all.sort.uniq.txt -o ligands.1000.mol2 -d /mnt/nfs/work/users/fischer/VDR/27Jan2014_learningDOCKrgc/Enrichment/1DB1/DOCKING/ligands end # pdbname
Enrichment
Here is the script for calculating enrichment (015.dock.AUCplot_of-lig-decoys.csh):
#!/bin/csh # This script creates a log adjusted AUC (ROC) plot with ligand vs decoy results # need X11 forwarding enabled when running remotely (ssh sgehead -X). # TEB/ MF -- March 2017 setenv DOCKBASE /nfs/home/tbalius/zzz.github/DOCK source /nfs/soft/python/envs/complete/latest/env.csh set filedir = `pwd` set mountdir = $filedir/docking/2runEnrich/ set d37 = $DOCKBASE/analysis/ cd $mountdir set dude_dir = "/mnt/nfs/home/tbalius/work/Water_Project_all_in_the_same_frame_ph4/databases" # should contain decoy.smi and ligand.smi for ROC script 00005...csh # #ln -s $dude_dir/ligands/ligands.smi . #ln -s $dude_dir/decoys/decoys.smi . # CHANGE THIS #set list = "4NVA_gist 4NVA_nogist" #set list = "4NVA_gist" #set list = "4NVA_nogist" #set list = "4NVA_min" set list = "4NVA_gist_elstat0p9" #set list = `cat filename` #set list = `cat $1` foreach pdbname ( $list ) set workdir = ${mountdir}/${pdbname}/ROC_ligdecoy/ # This script will not work without the following line: echo "HERE is the HAWK" # checks that previous script 0003 has produced mol2 files if (! ( -s $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2) && ! (-s $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2 )) then ls -l $mountdir/${pdbname}/decoys/allChunksCombined/poses.mol2 ls -l $mountdir/${pdbname}/ligands/allChunksCombined/poses.mol2 echo "skipping ${pdbname}. cannot generate ROC" continue endif rm -rf $workdir mkdir -p $workdir cd $workdir #wget http://dude.docking.org/targets/aa2ar/actives_final.ism # reads ZINC ids (ligand or decoy molecule names) # everything awk '{print $2}' $dude_dir/decoys/decoys.smi > decoys.name awk '{printf "%9s\n", $2}' $dude_dir/ligands/ligands.smi > ligands.name # for other names e.g. CHEMBL try this instead #awk '{printf "%9s\n", $3}' $dude_dir/ligands/ligands.smi > ligands.name cat ${mountdir}/${pdbname}/ligands/allChunksCombined/dirlist ${mountdir}/${pdbname}/decoys/allChunksCombined/dirlist > dirlist #which enrich.py set enrich_py = $d37/enrich.py set plots_py = $d37/plots.py # # calculates AUCs, stores in txt file which is then plotted for all ligands and decoys # - i is the flag for the input directory, this dir should contain the extract_all.sort.uniq.txt. # the scripts enrich_py and plots_py will go through the extract file and look for the ligand and decoy names. # when it finds them it will populate the ROC cruve. these values are devied by the total number of ligand or decoys. # note that ofen not all ligands and not all decoy finish so the point (1,1) is always included and interpolations is performed . . . # #python ${enrich_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name #python ${plots_py} -i $mountdir/${pdbname}/dockedLigDecoyCombined/ -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname python ${enrich_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name python ${plots_py} -i . -o . --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname # to plot logAUC python ${plots_py} -i . -o auc_plot -n --ligand-file=ligands.name --decoy-file=decoys.name -l $pdbname #-n flag to plot "normal" AUC end #pbdname echo " logAUCs > Look at plots: \n eog $mountdir/$list/ROC_ligdecoy/*png \n " echo " > Or better for printing: \n gthumb $mountdir/$list/ROC_ligdecoy/*png \n" echo "For EF's > see script 015.c.calc_enrichment_factor_gen.py for calculation of EF's \n" # For AUC's docking/2runEnrich/4NVA_gist/ROC_ligdecoy/roc_own.txt is used for plotting. # Note that roc_own.txt is for AUC but roc_own.png in /ROC_ligdecoy/ dir contains logAUC plot.
prepare system for docking (flex)
First we need to align the pdb with multiple states in the MD frame (050.flex.alignwithchimera_gist_conf.csh).
#!/bin/csh ## this script was written by trent balius in the Rizzo Group, 2011 ## modified in the Shoichet Group, 2013-2015 # TEB, MF comments -- March 2017 # This shell script will do the following: # (1) aligns the flexible receptor onto the 10th (production) MD frame -- as before for ligand. # Hence we can make a simlink using that ligand. set mountdir = `pwd` set workdir = $mountdir/flex/01align_to_md rm -rf $workdir mkdir -p $workdir cd $workdir #ln -s ${mountdir}/MDrundir/prep/002md_align . curl docking.org/~tbalius/code/waterpaper2017/parms/APO_rt_loop.pdb > APO_rt_loop.pdb set ref = "$mountdir/gist/006ref/ref.pdb" # snapshot from simulation set rec = "$workdir/APO_rt_loop.pdb" # rec with flexibility set chimerapath = "/nfs/soft/chimera/current/bin/chimera" #write instruction file for chimera based alignment cat << EOF > chimera.com # template #0 open $ref # rec #1 open $rec # move original to gist. it is harder to move the gist grids. mmaker #0 #1 write format pdb 0 ref.pdb write format pdb 1 rec_aligned.pdb EOF ${chimerapath} --nogui chimera.com > & chimera.com.out # we have already aligned the ligand previously. ln -s $mountdir/gist/007align_to_md/lig_aligned.pdb .
Next we prepare this receptor for docking using blastermaster with the flexible receptor-flag (051.flex.blastermaster.csh).
#!/bin/csh # This script runs Ryan's blastermaster python masterscript for generating everything that dock needs, i.e. grids, spheres # Run on sgehead as jobs are submitted to the queue # TEB/ MF - March 2017 setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" source /nfs/soft/python/envs/complete/latest/env.csh #set Multiplier = 1.0 set Multiplier = 2.0 set reslist = 186+187+188+189+190+191+192+193+194,199,228 # if you need help selecting which residues to include as flexible the following line find all entries with a letter in the alternative conformation column # grep "ATOM" rec.pdb | grep -v "^................ " | cut -c17-26 | uniq set mountdir = `pwd` set workdir = ${mountdir}/flex/2prep set pramdir = ${mountdir}/for011.dockprep_parm_files if ( -s $workdir ) then echo "$workdir does exits" exit endif mkdir -p $workdir cd $workdir ln -s $mountdir/gist/010a.full_gist_combine gistfiles cp $mountdir/flex/01align_to_md/rec_aligned.pdb . cp $mountdir/flex/01align_to_md/lig_aligned.pdb . cat rec_aligned.pdb | grep -v "HOH" | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | awk '{if($12 != "H"){print $0}}' | sed -e "s/HETATM/ATOM /g" | sed -e 's/HEM/HM2/g' >! rec.pdb cat lig_aligned.pdb | awk '{if ($1 == "ATOM" || $1 == "HETATM"){print $0}}' | sed -e "s/HETATM/ATOM /g" > xtal-lig.pdb # the following lines create a qsub script which submits blastermaster to the queue cat <<EOF > qsub.csh #!/bin/csh #\$ -cwd #\$ -j yes #\$ -o stderr #\$ -q all.q setenv DOCKBASE "/nfs/home/tbalius/zzz.github/DOCK" source /nfs/soft/python/envs/complete/latest/env.csh cd $workdir $DOCKBASE/proteins/blastermaster/blastermaster.py --addhOptions=" -HIS -FLIPs " --addhDict="$pramdir/reduce_wwPDB_het_dict_mod.txt" --chargeFile="$pramdir/amb_mod.crg.oxt" --vdwprottable="$pramdir/prot_mod.table.ambcrg.ambH" -v -f --flexiblePenaltyM=$Multiplier --flexibleResidues=${reslist} EOF qsub qsub.csh # this will produce two directories: # 1) working - contains all input and output files that are generated; not needed afterwards but as a reference # 2) dockfiles - contains everything that is needed to run dock (copied from working) # grids # trim.electrostatics.phi # vdw.vdw # vdw.bmp # ligand.desolv.heavy # ligand.desolv.hydrogen # spheres # matching_spheres.sph