# HG changeset patch # User iarc # Date 1461829405 14400 # Node ID 9d363eb081b5ff76d7295997506d1d49e6966108 # Parent 748b7a8b634c5d93ae4630c59b0fd5565a62f232 Uploaded diff -r 748b7a8b634c -r 9d363eb081b5 README.txt --- a/README.txt Thu Apr 21 09:36:32 2016 -0400 +++ b/README.txt Thu Apr 28 03:43:25 2016 -0400 @@ -2,17 +2,21 @@ MutSpec-Suite ============================== -Created by Maude Ardin and Vincent Cahais (Mechanisms of Carcinogenesis Section, International Agency for Research on Cancer F69372 Lyon France, http://www.iarc.fr/) +Created by Maude Ardin and Vincent Cahais (Mechanisms of Carcinogenesis Section, International Agency for Research on Cancer F69372 Lyon France, +http://www.iarc.fr/) Version 1.0 Released under GNU public license version 2 (GPL v2) -Package description: Ardin et al. - 2016 - MutSpec: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes - BMC Bioinformatics +Package description: Ardin et al. - 2016 - MutSpec: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse +cancer genomes - BMC Bioinformatics +http://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1011-z Test data: https://usegalaxy.org/u/maude-ardin/p/mutspectestdata + ### Requirements # python-dev @@ -23,7 +27,8 @@ # Annovar If you do not have ANNOVAR installed, you can download it here: http://www.openbioinformatics.org/annovar/annovar_download_form.php -1) Once downloaded, install annovar per the installation instructions and edit the PATH variable in galaxy deamon (/etc/init.d/galaxy) to reflect the location of directory containing perl scripts. +1) Once downloaded, install annovar per the installation instructions and edit the PATH variable in galaxy deamon (/etc/init.d/galaxy) +to reflect the location of directory containing perl scripts. 2) Create directories for saving Annovar databases 2-a Create a folder (annovardb) for saving all Annovar databases, e.g. hg19db @@ -48,7 +53,8 @@ The list of all available databases can be found here: http://annovar.openbioinformatics.org/en/latest/user-guide/download/ -5) Edit the annovar_index.loc file (in the folder galaxy-dist/tool-data/toolshed/repos/iarc/mutspec/revision/) to reflect the location of annovardb folder (containing all the databases files downloaded from Annovar). +5) Edit the annovar_index.loc file (in the folder galaxy-dist/tool-data/toolshed/repos/iarc/mutspec/revision/) to reflect the location +of annovardb folder (containing all the databases files downloaded from Annovar). Restart galaxy instance for changes in .loc file to take effect or reload it into the admin interface. 6) Edit the file build_listAVDB.txt in the mutspec install directory to reflect the name and the type of the databases installed @@ -57,20 +63,24 @@ ### Installation # MutSpec-Stat and MutSpec-NMF -By default 1 CPU is used by these tools, but you may edit mutspecStat_wrapper.sh and mutspecNmf_wrapper.sh to change this number to the maximum number of CPU available on your server. +By default 8 CPUs are used by these tools, but you may edit mutspecStat_wrapper.sh and mutspecNmf_wrapper.sh to change this number +to the maximum number of CPU available on your server. MutSpec-Stat and MutSpec-NMF tools allow parallel computations that are time consuming. It is recommended to use the highest number of cores available on the Galaxy server to reduce the computation time of these tools. + # MutSpec-Annot -The maximum CPU value needs to be specified when installing MutSpec package by editing the file mutspecAnnot.pl to reflect the maximum number of CPU available on your server (by default 1 CPU is used). +The maximum CPU value needs to be specified when installing MutSpec package by editing the file mutspecAnnot.pl to reflect the maximum number +of CPU available on your server. -This tool may be time consuming for large files. For example, annotating a file with more than 25,000 variants takes 1 hour using 1 CPU (2.6 GHz), while annotating this file using 8 CPUs takes only 5 minutes. +This tool may be time consuming for large files. For example, annotating a file of more than 25,000 variants takes 1 hour using 1 CPU (2.6 GHz), +while annotating this file using 8 CPUs takes only 5 minutes. We have optimized MutSpec-Annot so that the tool uses more CPUs, if available, as follows: -files with less than 5,000 lines: 1 CPU is used -files with more than 5,000 and less than 25,000 lines: 2 CPUs are used --files with more than 25,000 and less than 100,000 lines: 8 (or maximum CPUs, if less than 8 CPUs are available) are used (our benchmark results didn't show any time saving using more than 8 cores for files with more than 25,000 -but less than 100,000 lines) +-files with more than 25,000 and less than 100,000 lines: 8 (or maximum CPUs, if less than 8 CPUs are available) are used (our benchmark +results didn't show any time saving using more than 8 cores for files with more than 25,000 but less than 100,000 lines) -files with more than 100,000: maximum CPUs are used diff -r 748b7a8b634c -r 9d363eb081b5 mutspecAnnot.pl --- a/mutspecAnnot.pl Thu Apr 21 09:36:32 2016 -0400 +++ b/mutspecAnnot.pl Thu Apr 28 03:43:25 2016 -0400 @@ -3,7 +3,7 @@ #-----------------------------------# # Author: Maude # # Script: mutspecAnnot.pl # -# Last update: 17/02/16 # +# Last update: 26/04/16 # #-----------------------------------# use strict; @@ -38,7 +38,7 @@ ######################################### ### SPECIFY THE NUMBER OF CPU ### ######################################### -our $max_cpu = 1; # Max number of CPU to use for the annotation +our $max_cpu = 12; # Max number of CPU to use for the annotation # Recover the current path @@ -524,21 +524,19 @@ if($fullAVDB eq "yes") { AnnotateAV("$folder_temp/$outFilenameTemp-AVInput", "$folder_temp/$outFilenameTemp"); } else { annotateAV_min("$folder_temp/$outFilenameTemp-AVInput", "$folder_temp/$outFilenameTemp"); } - # Check if the annotations worked - open(F1, "$folderMutAnalysis/log_annovar.txt") or die "$!: $folderMutAnalysis/log_annovar.txt\n"; - while() - { - if($_ =~ /ERROR/i) + open(F1, "$folderMutAnalysis/log_annovar.txt") or die "$!: $folderMutAnalysis/log_annovar.txt\n"; + while() { - print STDERR "\n\n\t\tANNOVAR LOG FILE\n\n"; - print STDERR $_; - print STDERR "\n\n\t\tANNOVAR LOG FILE\n\n\n"; - exit; + if($_ =~ /ERROR/i) + { + print STDERR "\n\n\t\tANNOVAR LOG FILE\n\n"; + print STDERR $_; + print STDERR "\n\n\t\tANNOVAR LOG FILE\n\n\n"; + exit; + } } - } - close F1; - + close F1; # Recover the strand orientation my $length_AVheader = 0; @@ -552,11 +550,9 @@ # Wait all the child process $pm->wait_all_children; - - #### Paste the file together + # Paste the file together CombinedTempFile("$folder_temp/$filenameO", "$folderAnnovar/$filenameO".".".${refGenome}."_multianno.txt"); } - # Remove the temporary directory rmtree($folder_temp); } @@ -669,7 +665,7 @@ my @tab = split("\t", $_); # db name like refGenome_dbName.txt - if( ($tab[0] =~ /\w+_(\w+)\.txt/) && ($tab[0] !~ /sites/) && ($tab[0] !~ /esp/) && ($tab[0] !~ /sift/) && ($tab[0] !~ /pp2/) ) + if( ($tab[0] =~ /\w+_(\w+)\.txt/) && ($tab[0] !~ /sites/) && ($tab[0] !~ /esp/) && ($tab[0] !~ /ljb26/) ) { $$refS_protocol .= $1.","; $$refS_operation .= $tab[1].","; } @@ -687,7 +683,7 @@ $$refS_protocol .=$AVdbName_final.","; $$refS_operation .= $tab[1].","; } # ESP - if( ($tab[0] =~ /esp/) || ($tab[0] =~ /sift/) || ($tab[0] =~ /pp2/) ) + if( ($tab[0] =~ /esp/) || ($tab[0] =~ /ljb26/) ) { $tab[0] =~ /\w+_(\w+)_(\w+)\.txt/; my $AVdbName_final = $1."_".$2; @@ -1150,7 +1146,7 @@ mutspecannot.pl --refGenome hg19 --interval 10 --outfile output_directory --pathAnnovarDB path_to_annovar_database --pathAVDBList path_to_the_list_of_annovar_DB --temp path_to_temporary_directory --fullAnnotation yes|no input - Version: 02-2016 (Feb 2016) + Version: 04-2016 (Apr 2016) =head1 OPTIONS diff -r 748b7a8b634c -r 9d363eb081b5 mutspecFilter.xml --- a/mutspecFilter.xml Thu Apr 21 09:36:32 2016 -0400 +++ b/mutspecFilter.xml Thu Apr 28 03:43:25 2016 -0400 @@ -12,14 +12,14 @@ $segDup $esp $thG - #if $FilterdbSNP.dbSNP == True: + #if str($FilterdbSNP.dbSNP) == "true" or $FilterdbSNP.dbSNP == True: --dbSNP ${FilterdbSNP.column} #else --dbSNP 0 #end if - --refGenome ${refGenome} + --refGenome ${refGenome} --outfile $output - $input + $input @@ -94,6 +94,7 @@ + @ARTICLE{ardin_mutspec:_2016, diff -r 748b7a8b634c -r 9d363eb081b5 mutspecStat.xml --- a/mutspecStat.xml Thu Apr 21 09:36:32 2016 -0400 +++ b/mutspecStat.xml Thu Apr 28 03:43:25 2016 -0400 @@ -14,7 +14,7 @@ mutspecStat_wrapper.sh $html ${GALAXY_DATA_INDEX_DIR}/shared/ucsc/chrom/ - #if $estimateSignature.estimSign == True: + #if str($estimateSignature.estimSign) == "true" or $estimateSignature.estimSign == True: ${estimateSignature.estimT} #else 0 @@ -43,7 +43,7 @@ - +