Model ensemble setup

From DISI
Jump to navigation Jump to search

This is a really simple script to facilitate automated setup and docking for an ensemble of models. It is intended to be used to screen a set of models for which performs best at enriching known ligands from a relatively small set of decoys/inactives (e.g. the Roth lab collection). It submits a series of jobs for each model in an input list and then submits a predefined docking job for each model after it has been prepared using blastermaster. This means that it requires each of the nodes in the first set of jobs (blastermaster, etc.) to in turn act as a submit host for the second set (docking). It should not be used for large screens.

Copied from /nfs/home/kstafford/scripts/model_ensemble_setup.py:

#! /usr/bin/env python

import os, sys

def model_ensemble_setup(modellist,xtal,dirstring):
	f = open(modellist)
	g = f.readlines()
	f.close()

	pwd = os.getcwd()
	dirlist = []

	for i in range(0,len(g)):
		m = g[i].strip().split('.pdb')[0]
		print m
		if not os.path.exists(m):
			os.mkdir(m)
	
		os.chdir(m)
		os.system('ln -s ../%s m.pdb' % g[i].strip())
		os.system('ln -s %s xtal-lig.pdb' % xtal.strip())

		# simple, obvious way to build dirlist
		dirlist.append(m+'\n')

		os.chdir(pwd)

	f = open('dirlist','w+')
	f.writelines(dirlist)
	f.close()

	runtext = '''#$ -S /bin/csh
#$ -cwd
#$ -j yes
#$ -o test.out
#$ -e test.err
#$ -q all.q



echo "Starting dock in directory: $PWD"
set dirarray=`cat dirlist`
set pth=$dirarray[$SGE_TASK_ID]
cd $pth
echo $pth

$DOCKBASE/bin/blastermaster -v
$DOCKBASE/bin/setup_db2_lots 20 d "%s"
$DOCKBASE/bin/submit_dock 
''' % dirstring

	f = open('run_all_models.csh','w+')
	f.write(runtext)
	f.close()

#	os.system('qsub -t 1-%d run_all_models.csh' % len(g))

	return

if __name__ == "__main__":
	if len(sys.argv) != 4:
		print "Usage: [file containing a list of models] [full path to xtal-lig] [directory search pa
th to db2 files]"
	else:
		model_ensemble_setup(sys.argv[1],sys.argv[2],sys.argv[3])
	sys.exit()

After the docking is done, you can parallelize the usual analysis steps like so:

Copied from /nfs/home/kstafford/scripts/run_analysis.py:

#! /usr/bin/env python

import os,sys

def run_analysis(dirlist,liglist,decoylist):

	if not os.path.exists(liglist):
		print "Ligand ID file %s doesn't exist" % liglist
		return
	if not os.path.exists(decoylist):
		print "Decoy ID file %s doesn't exist" % decoylist
		return
	if not os.path.exists(dirlist):
		print "Directory list %s doesn't exist" % dirlist

	submit_text = '''#$ -S /bin/csh
#$ -cwd
#$ -j yes
#$ -o test.out
#$ -e test.err
#$ -q all.q



echo "Starting dock in directory: $PWD"
set dirarray=`cat %s`
set pth=$dirarray[$SGE_TASK_ID]
cd $pth
echo $pth


$DOCKBASE/analysis/extract_all.py
$DOCKBASE/analysis/enrich.py -l %s -d %s
$DOCKBASE/analysis/getposes.py

echo "started task"
echo $SGE_TASK_ID

exit $status
''' % (dirlist,liglist,decoylist)

	f = open(dirlist)
	dirs = f.readlines()
	f.close()
	ndirs = len(dirs)	

	f = open('run_analysis.csh','w+')
	print >>f, submit_text
	f.close()

	os.system('qsub -t 1-%d run_analysis.csh' % ndirs)


	return
	
if __name__ == "__main__":
	if len(sys.argv) != 4:
		print "Usage: [directory list] [ligand ID list] [decoy ID list]"
	else:
		run_analysis(sys.argv[1],sys.argv[2],sys.argv[3])
	sys.exit()

Note that as written, the first script takes a list of model files as input (i.e. "model1.pdb", etc) and the second takes a list of directory names, which are by default the names of the models ("model1", etc). This is because you may want to run the analysis over only a subset of models, or over an ensemble generated by a different method, etc.