######################################################### ## ## cMonkey biclustering R scripts, version 1.1 ## Thu Dec 8 18:19:55 PST 2005 ## Authors: David J. Reiss, ISB (c) 2005-2006 ## Richard Bonneau, ISB ## http://halo.systemsbiology.org/cmonkey ## ######################################################### ## This code is available for non-profit use only, free to be used and modified. ## If results derived using this method are published, please cite: ## Integrated biclustering of heterogeneous genome-wide datasets for the inference ## of global regulatory networks ## by David J Reiss, Nitin S Baliga, Richard Bonneau ## BMC Bioinformatics 2006, 7:280 (2 June 2006) ######################################################### ## Currently supported organisms: halo, hpy, ecoli, yeast. ## All data and code are provided as R-Data binary "object" files. ## The source for the functions included in these files are visible using R # (http://www.r-project.org) and may be easily updated/overridden. ## Installation instructions below have only been tested on UNIX-based OSes (Linux, OSX). ######################################################### ## INSTALLATION / RUNNING: ## 1. Download the relevant organism's RData file and place in a directory called "data/" ## 2. Download the "cMonkey_v1.1_source.RData" and "cMonkey.R" files and place them in the ## current directory ## 3. Download (and compile if necessary) MEME and MAST ## (http://metameme.sdsc.edu/mhmm-download.html) ## 4. Create links to "meme" and "mast" from within the "progs/" directory. ## 5. Start R, and in the R environment: ## 6. > install.packages("brlr",repos="http://cran.r-project.org") ## install brlr library ## 7. > load("cMonkey_v1.1_source.RData") ## load all cMonkey code ## 8. > run() ## enter organism to run on ######################################################### ## NOTES: ## A. Ignore all warning messages printed to the screen - only worry if it dies with an error. ## B. All output is placed in the output/ORGANISM/ directory, along with PDF files containing ## bicluster plots and statistics generated during the optimization. Also output are ## RData object files, for re-starting a stopped/killed session and for input to run ## the Inferelator (regulatory network inference algorithm). ## C. You may kill/restart a run at any time. To completely re-start a new job, delete the ## output/ORGANISM directory. ## D. At any time you may plot a bicluster (e.g. #2) by typing (in R, after running ## step 7 above): ## > start.up(); load.latest(); plotCluster.motif( clusterStack[[2]] ) ## E. Or, to plot them all to a PDF, instead of the "plotCluster.motif()" function, type: ## > plot.clusters( clusterStack ) ## PDF is placed in output/ORGANISM ######################################################### ## ADVANCED USAGE: if you want to adjust the cMonkey parameters, between steps (7) and (8) ## above, insert: ## a. > start.up(); detach( params ) ## b. * change the parameter that you wish to modify in the "params" list ## c. > attach( params ) ######################################################### ## OTHER ORGANISMS: if you wish to use cMonkey on your own data for another organism, ## you need to inspect the "global.data" data structure, and replace the sub-structures ## seen there. I will be happy to offer help/suggestions/advice. ######################################################### ## PARALELLIZATION: Additional headaches arise if you want run cMonkey parallelized. ## I have not included the necessary ingredients in this distribution, but I will, if ## anyone expresses a desire to do so. The extra steps include: ## 1. installing pvm for your machine ## 2. setting up pvm-related environment variables ## 3. installing the rpvm and snow R libraries ## 5. loading the snow-utils code (not included here) ## 4. editing the test.opt.all.clusters() function to uncomment the snow-related functions ######################################################### ## CONTACT for assistance: David J. Reiss, ISB -- dreiss@systemsbiology.org ######################################################### if ( ! exists( "iter" ) || is.na( iter ) ) iter <- 0 try( detach( params ) );try( detach( params ) );try( detach( params ) );try( detach( params ) ); try( detach( all.iter.params ) );try( detach( all.iter.params ) );try( detach( all.iter.params ) );try( detach( all.iter.params ) ); start.up() cat.new( date(), "\n" ) ## NOTE that we re-init the cluster stack here! clusterStack <- list() load.latest() ## This allows us to re-start a stopped run ## Here is the meat bad.move.temp <- numeric() iters <- 1:n.iters for ( i in iters ) bad.move.temp[ i ] <- get.iter.based.params( "bad.move.temperature", i ) tmp.opt <- test.opt.all.clusters( clusterStack, ks=1:kmax, iters=iters, max.rows=expected.cluster.rows, weight.fixed=weight.fixed, bad.move.temp=bad.move.temp, mean.count=mean.clusters.per.gene, max.moves=max.col.moves.per.cluster, add.max=max.add.rows.per.iter, remove.max=max.remove.rows.per.iter ) clusterStack <- tmp.opt$clusts optimization.logs <- tmp.opt$logs rm( tmp.opt ) gc() cat.new( date(), "\n" ) iter <- max( iters ) ## Current cluster's image now gets saved in test.opt.clusterStack ## Save abridged version of clusterstack only for inferelator (remove motif info, etc.) clusterStack <- prune.clusterStack( clusterStack, cluster.row.floor ) gc() good.cstack <- clusterStack for ( k in 1:clusterStack$k ) ##clusterStack[[ k ]]$motif.out$p.values <- clusterStack[[ k ]]$motif.out$diagrams <- clusterStack[[ k ]]$motif.out$mast.info <- clusterStack[[ k ]]$motif.out$meme.out <- clusterStack[[ k ]]$motif.out$is.pal <- clusterStack[[ k ]]$motif.out$bg.order <- clusterStack[[ k ]]$motif.out$seq.length <- clusterStack[[ k ]]$motif.out$too.few <- clusterStack[[ k ]]$motif.out$num.unique <- NULL ##clusterStack[[ k ]]$changed <- NULL curr.state.file <- paste( state.file, ".final.clusters.RData", sep="" ) save( clusterStack, params, all.iter.params, date.biclust.run, biclust.version, iter, gene.coords, file=curr.state.file, compress=T ) clusterStack <- good.cstack rm( good.cstack ); gc() cat.new( date(), "\n" ) plot.clusters( clusterStack ) graphics.off() gc() cat.new( date(), "\n" )