changeset 23:2770f49cb0dc

re-uploading contra
author Franco Caramia <franco.caramia@petermac.org>
date Tue, 20 May 2014 09:59:00 +1000
parents e8a98923965e
children 6bcf47cc272a
files CONTRA_User_Guide.2.0.pdf all_fasta.loc.sample baseline.xml baseline_wrapper.pl contra.xml contra_wrapper.pl tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 8 files changed, 624 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file CONTRA_User_Guide.2.0.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample	Tue May 20 09:59:00 2014 +1000
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3     apiMel3   Honeybee (Apis mellifera): apiMel3     /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon   hg19      Human (Homo sapiens): hg19 Canonical   /path/to/genome/hg19/hg19canon.fa
+#hg19full    hg19      Human (Homo sapiens): hg19 Full        /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline.xml	Tue May 20 09:59:00 2014 +1000
@@ -0,0 +1,71 @@
+<tool id="baseline_tool" name="Baseline" version="1.0.0">
+  <description>: Control files for Contra</description>
+  <requirements>
+	<requirement type="package" version="2.17.0">bedtools</requirement>
+	<requirement type="package" name="samtools" version="0.1.18">samtools</requirement>
+	<requirement type="package" name="contra" version="2.0.4">contra</requirement>
+  </requirements>
+  <command interpreter="perl">
+    
+	baseline_wrapper.pl
+	
+	##Required files
+	"PLAYEROPTION::-t=$target_file"
+	
+	#for $group in $file_group
+		"BAMLISTENTRY::${group.bam}"
+	#end for
+	
+	"PLAYEROPTION::--name=$sampleName"
+	"PLAYEROPTION::--trim=$trim"
+	
+	##File to generate the bam list
+	"BASELINEOUTPUT::$baseline_output"
+  </command>
+	<inputs>
+		<param name="target_file" type="data" format="bed" help="" optional="false" />		
+		<repeat name="file_group" title="Bam file">
+			<param format="bam" name="bam" type="data" label="BAM File" help=""/>
+		</repeat>
+		<param name="sampleName" value="baseline" type="text"  optional="true" />
+		<param name="trim"  type="float" value="0.2" optional="true" />
+
+		
+	</inputs>
+	<outputs>
+		<data name="baseline_output" title="Baseline Output"  format="tabular" type="data"  label="Baseline_Control.txt" />
+	</outputs>
+	<help>
+|
+
+**Reference**
+	http://contra-cnv.sourceforge.net/
+
+-----
+
+**What it does**
+
+Creating a baseline control from multiple samples is can be useful when a matched control is not available. In the CONTRA download page, we have provided several baseline files for some of the platforms that we have tried. Alternatively, the “baseline.py” script that comes with CONTRA can be used to generate a custom baseline file. 
+
+-----
+ 
+**Parameters**
+
+::
+
+  -t, --target          Target region definition file [REQUIRED] [BED format] 
+
+  -f, --files           Files to be converted to baselines [REQUIRED] [BAM] 
+
+  -o, --output          Output folder [REQUIRED] 
+
+  -c, --trim            Portion of outliers to be removed before calculating 
+                        average [Default: 0.2]
+                        
+  -n, --name            Output baseline file name [Default: baseline] 
+
+
+	</help>
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline_wrapper.pl	Tue May 20 09:59:00 2014 +1000
@@ -0,0 +1,68 @@
+#sshpass -p pmac1512 ssh -o StrictHostkeyChecking=no galaxy@pmc-bioinf03 "gmt music play $*"
+#echo "gmt music play $*"
+
+
+use strict;
+use warnings;
+use File::Basename; 
+use Cwd;
+die qq(
+Bad numbr of inputs
+
+) if(!@ARGV);
+
+
+my $player_options = "";
+my $baseline_output;
+
+my $dir = getcwd;
+my $variable = "";
+my $files = "";
+foreach my $input (@ARGV) 
+{
+	my @tmp = split "::", $input;
+	
+	if($tmp[0] eq "PLAYEROPTION") 
+	{
+		$variable = $tmp[1];
+		$variable =~ s/=/ /g;
+		$player_options = "$player_options $variable";
+	}
+	elsif($tmp[0] eq "BASELINEOUTPUT") 
+	{
+		$baseline_output = $tmp[1];
+	}  
+	elsif($tmp[0] eq "BAMLISTENTRY") 
+	{
+		$files = "$files ${tmp[1]}";
+	}
+	else 
+	{
+		die("Unknown Input: $input\n");
+	}
+}
+
+
+my $working_dir = "BASELINE_OUTPUT";
+#remove extension
+
+#Create Contra Output dir 
+system ("mkdir $working_dir");
+
+#run baseline
+
+system ("baseline.py --file $files --output $working_dir $player_options > /dev/null");
+
+#Search control file in output dir
+opendir(DIR, $working_dir);
+my @FILES= readdir(DIR); 
+foreach my $file (@FILES) 
+{
+	my ($filename,$directory,$extension) = fileparse($file, qr/\.[^.]*/);
+	if ($extension eq ".txt")
+	{
+		system ("mv $working_dir/$file $baseline_output");
+	}
+}
+closedir(DIR);
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contra.xml	Tue May 20 09:59:00 2014 +1000
@@ -0,0 +1,288 @@
+<tool id="contra_tool" name="Contra Copy number analysis" version="1.0.0">
+  <description>: Copy Number Analysis for Targeted Resequencing</description>
+  <requirements>
+    <requirement type="package" version="2.17.0">bedtools</requirement>
+    <requirement type="package" name="samtools" version="0.1.18">samtools</requirement>
+    <requirement type="package" name="contra" version="2.0.4">contra</requirement>
+  </requirements>
+  <command interpreter="perl">
+    
+	contra_wrapper.pl
+	
+	##Ref Genome
+	#if $genomeSource.refGenomeSource == "history":
+        	"PLAYEROPTION::-f=${genomeSource.ownFile}"
+        #else:
+        ##use precomputed indexes
+        	"PLAYEROPTION::-f=${genomeSource.indices.fields.path}"
+	#end if
+	
+	##Required files
+	"PLAYEROPTION::-t=$target_file"
+	"PLAYEROPTION::-s=$alignment_file"
+	#if $controlSource.refControlSource == "history":
+         	"PLAYEROPTION::-c=${controlSource.control_file}"
+        #else:
+        ##use precomputed indexes
+        	"PLAYEROPTION::-c=${controlSource.indices.fields.path}"
+        #end if
+	
+	##Optional parameter
+        
+        #if $option.option == "modify_parameters":
+        
+        	"PLAYEROPTION::--numBin=$option.numBin"
+        	"PLAYEROPTION::--minReadDepth=$option.minReadDepth"
+        	"PLAYEROPTION::--minNBases=$option.minNbases"
+		
+		#if str($option.sam) == "true":
+	        	"PLAYEROPTION::--sam"
+        	#end if
+        	
+        	#if str($option.bed) == "true":
+	        	"PLAYEROPTION::--bed"
+        	#end if
+        	
+        	"PLAYEROPTION::--pval=$option.pval"
+        	"PLAYEROPTION::--sampleName=$option.sampleName"
+		
+		#if str($option.nomultimapped) == "true":
+	        	"PLAYEROPTION::--nomultimapped"
+        	#end if
+        	
+        	#if str($option.plot) == "true":
+	        	"PLAYEROPTION::--plot"
+        	#end if
+        	
+        	"PLAYEROPTION::--minExon=$option.minExon"
+        	"PLAYEROPTION::--minControlRdForCall=$option.minControlRdForCall"
+        	"PLAYEROPTION::--minTestRdForCall=$option.minTestRdForCall"
+        	"PLAYEROPTION::--minAvgForCall=$option.minAvgForCall"
+        	"PLAYEROPTION::--maxRegionSize=$option.maxRegionSize"
+        	"PLAYEROPTION::--targetRegionSize=$option.targetRegionSize"
+        	
+        	#if str($option.largedeletion) == "true":
+	        	"PLAYEROPTION::--largedeletion"
+        	#end if
+        	
+        	"PLAYEROPTION::--smallSegment=$option.smallSegment"
+        	"PLAYEROPTION::--targetRegionSize=$option.targetRegionSize"
+        	"PLAYEROPTION::--largeSegment=$option.largeSegment"
+        	"PLAYEROPTION::--lrCallStart=$option.lrCallStart"
+        	"PLAYEROPTION::--lrCallEnd=$option.lrCallEnd"
+        	"PLAYEROPTION::--passSize=$option.passSize"
+        #end if
+	
+	##File to generate the bam list
+	CONTRAOUTPUT::$html_file
+	CONTRADIR::$html_file.files_path
+	
+  </command>
+	<inputs>
+	
+		<conditional name="genomeSource">
+			<param name="refGenomeSource" type="select" label="Will you select a reference from your history or use a built-in fasta file?">
+				<option value="indexed">Use a built-in index</option>
+				<option value="history">Use one from the history</option>
+			</param>
+			<when value="indexed">
+				<param name="indices" type="select" label="Select a reference genome">
+				  <options from_data_table="all_fasta">
+				    <filter type="sort_by" column="2" />
+				    <validator type="no_options" message="No indexes are available" />
+				  </options>
+				</param>
+			</when>
+			<when value="history">
+				<param name="ownFile" type="data" format="fasta" label="Select a reference from history" />
+			</when>
+		</conditional>
+	
+		<param name="target_file" type="data" format="bed" help="" optional="false" />		
+		<param name="alignment_file" type="data" format="bam,sam" help="" optional="false" />
+		
+		
+		<conditional name="controlSource">
+			<param name="refControlSource" type="select" label="Will you select a reference from your history or use a built-in control file?">
+				<option value="indexed">Use a built-in control</option>
+				<option value="history">Use one from the history</option>
+			</param>
+			<when value="indexed">
+				<param name="indices" type="select" label="Select a baseline control">
+				  <options from_data_table="baseline_files">
+				    <filter type="sort_by" column="2" />
+				    <validator type="no_options" message="No files available" />
+				  </options>
+				</param>
+			</when>
+			<when value="history">
+				<param name="control_file" type="data" format="bam,sam,bed,tabular" help="" optional="false" />
+			</when>
+		</conditional>
+		
+		<conditional name="option">
+			<param name="option" type="select" label="Optional Parameters" help="" optional="true">
+				<option value="default_parameters" selected="true">Default Parameters</option>
+				<option value="modify_parameters">Modify Parameters</option>
+			</param>
+			<when value="modify_parameters">
+				<param name="numBin"  type="integer" value="20" optional="true" />
+				<param name="minReadDepth"  type="integer" value="10" optional="true" />
+				<param name="minNbases"  type="integer" value="10" optional="true" />
+				<param name="sam" type="select" label="sam" help="" optional="true">
+					<option value="true"  >true</option>
+					<option value="false" selected="true">false</option>
+				</param>
+				<param name="bed" type="select" label="bed" help="" optional="true">
+					<option value="true"  >true</option>
+					<option value="false" selected="true">false</option>
+				</param>
+				<param name="pval"  type="float" value="0.05" optional="true" />
+				<param name="sampleName" value="Contra_Output" type="text"  optional="true" />
+				<param name="nomultimapped" type="select" label="no multimapped" help="" optional="true">
+					<option value="true"  >true</option>
+					<option value="false" selected="true">false</option>
+				</param>
+				<param name="plot" type="select" label="plot" help="" optional="true">
+					<option value="true"  >true</option>
+					<option value="false" selected="true">false</option>
+				</param>
+				<param name="minExon"  type="integer" value="2000" optional="true" />
+				<param name="minControlRdForCall"  type="integer" value="5" optional="true" />
+				<param name="minTestRdForCall"  type="integer" value="0" optional="true" />
+				<param name="minAvgForCall"  type="integer" value="20" optional="true" />
+				<param name="maxRegionSize"  type="integer" value="0" optional="true" />
+				<param name="targetRegionSize"  type="integer" value="200" optional="true" />
+				<param name="largedeletion" type="select" label="large deletion" help="" optional="true">
+					<option value="true"  >true</option>
+					<option value="false" selected="true">false</option>
+				</param>
+				
+				<param name="smallSegment"  type="integer" value="1" optional="true" />
+				<param name="largeSegment"  type="integer" value="25" optional="true" />
+				<param name="lrCallStart"  type="float" value="-0.3" optional="true" />
+				<param name="lrCallEnd"  type="float" value="0.3" optional="true" />
+				<param name="passSize"  type="float" value="0.5" optional="true" />
+		
+			</when>
+		</conditional>	
+	</inputs>
+	<outputs>
+		<data name="html_file" format="html" label="Contra Output" />
+	</outputs>
+	<help>
+|
+
+
+**Reference**
+	http://contra-cnv.sourceforge.net/
+
+-----
+
+**What it does**
+
+CONTRA is a tool for copy number variation (CNV) detection for targeted resequencing data such as those from whole-exome capture data. CONTRA calls copy number gains and losses for each target region with key strategies include the use of base-level log-ratios to remove GC-content bias, correction for an imbalanced library size effect on log-ratios, and the estimation of log-ratio variations via binning and interpolation. It takes standard alignment formats (BAM/SAM) and output in variant call format (VCF 4.0) for easy integration with other next generation sequencing analysis package.
+
+
+-----
+ 
+**Required Parameters**
+
+::
+
+  -t, --target         Target region definition file [BED format] 
+
+  -s, --test           Alignment file for the test sample [BAM/SAM] 
+
+  -c, --control        Alignment file for the control sample 
+                       [BAM/SAM/BED – baseline file]
+
+  --bed                **option has to be supplied for control
+                       with baseline file.** 
+
+  -f, --fasta          Reference genome [FASTA]
+
+  -o, --outFolder      the folder name (and its path) to store the output 
+                       of the analysis (this new folder will be created – 
+                       error message occur if the folder exists) 
+
+-----
+
+**Optional Parameters**
+
+::
+
+  --numBin              Numbers of bins to group the regions. User can 
+                        specify multiple experiments with different numbers
+                        of bins (comma separated). [Default: 20] 
+
+  --minReadDepth        The threshold for minimum read depth for each bases 
+                        (see Step 2 in CONTRA workflow) [Default: 10] 
+
+  --minNBases           The threshold for minimum number of bases for each 
+                        target regions (see Step 2 in CONTRA workflow) 
+                        [Default: 10] 
+
+  --sam                 If the specified test and control samples are in 
+                        SAM format. [Default: False] (It will always take 
+                        BAM samples as default) 
+
+  --bed                 If specified, control will be a baseline file in 
+                        BED format. [Default: False] 
+                        Please refer to the Baseline Script section for 
+                        instruction how to create baseline files from set 
+                        of BAMfiles. A set of baseline files from different 
+                        platform have also been provided in the CONTRA 
+                        download page. 
+
+  --pval                The p-value threshold for filtering. Based on Adjusted 
+                        P-Values. Only regions that pass this threshold will 
+                        be included in the VCF file. [Default: 0.05] 
+
+  --sampleName          The name to be appended to the front of the default output 
+                        name. By default, there will be nothing appended. 
+
+  --nomultimapped       The option to remove multi-mapped reads 
+                        (using SAMtools with mapping quality > 0). 
+                        [default: FALSE] 
+
+  -p, --plot            If specified, plots of log-ratio distribution for each 
+                        bin will be included in the output folder [default: FALSE] 
+
+  --minExon             Minimum number of exons in one bin (if less than this number
+                        , bin that contains small number of exons will be merged to 
+                        the adjacent bins) [Default : 2000] 
+
+  --minControlRdForCall Minimum Control ReadDepth for call [Default: 5] 
+
+  --minTestRdForCall    Minimum Test ReadDepth for call [Default: 0] 
+
+  --minAvgForCall       Minimum average coverage for call [Default: 20] 
+
+  --maxRegionSize       Maximum region size in target region (for breaking 
+                        large regions into smaller regions. By default, 
+                        maxRegionSize=0 means no breakdown). [Default : 0] 
+
+  --targetRegionSize    Target region size for breakdown (if maxRegionSize 
+                        is non-zero) [Default: 200] 
+
+  -l, --largeDeletion   If specified, CONTRA will run large deletion analysis (CBS).
+                        User must have DNAcopy R-library installed to run the 
+                        analysis. [False] 
+
+  --smallSegment        CBS segment size for calling large variations [Default : 1] 
+
+  --largeSegment        CBS segment size for calling large variations [Default : 25] 
+
+  --lrCallStart         Log ratios start range that will be used to call CNV 
+                        [Default : -0.3] 
+
+  --lrCallEnd           Log ratios end range that will be used to call CNV 
+                        [Default : 0.3] 
+
+  --passSize            Size of exons that passed the p-value threshold compare 
+                        to the original exons size [Default: 0.5] 
+	</help>
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contra_wrapper.pl	Tue May 20 09:59:00 2014 +1000
@@ -0,0 +1,91 @@
+use strict;
+use warnings;
+
+use FindBin;
+use File::Path qw(make_path);
+use File::Spec;
+
+
+die "Bad number of inputs" if(!@ARGV);
+
+my $player_options = "";
+my $contra_output;
+my $contra_dir;
+
+
+
+foreach my $input (@ARGV) 
+{
+	my @tmp = split "::", $input;
+	
+	if($tmp[0] eq "PLAYEROPTION") 
+	{
+		my $variable = $tmp[1];
+		$variable =~ s/=/ /g;
+		print "$variable\n";
+		$player_options = "$player_options $variable";
+	}
+	elsif($tmp[0] eq "CONTRAOUTPUT") 
+	{
+		$contra_output = $tmp[1];
+	}  
+	elsif($tmp[0] eq "CONTRADIR") 
+	{
+		$contra_dir = $tmp[1];
+	}  
+	else 
+	{
+		die("Unknown input: $input\n");
+	}
+}
+
+
+my $working_dir = "CONTRA_OUTPUT";
+make_path($contra_dir);
+#remove extension
+
+#run contra 
+system(File::Spec->catfile($FindBin::Bin, 'contra.py') . " -o $working_dir $player_options > /dev/null 2>&1");
+
+#set html
+#print "$contra_output - $working_dir\n";
+open(HTML, ">$contra_output");
+print HTML "<html><head><title>Contra: Copy Number Analysis for Targeted Resequencing</title></head><body><h3>Contra Output Files:</h3><p><ul>\n";
+move_files($working_dir);
+print HTML "</ul></p>\n";
+close(HTML);
+
+sub move_files
+{
+	my $local_dir = $_[0];
+	opendir(DIR, $local_dir);
+	#print ("Openning: $local_dir\n");
+	my @FILES= readdir(DIR); 
+	closedir(DIR);
+	foreach my $file (@FILES) 
+	{
+		if ($file eq "." || $file eq "..")
+		{
+			#print ("./ or ../ skipped\n");
+		}
+		elsif (-d "$local_dir/$file")
+		{
+			#print ("moving to: $local_dir/$file\n");
+			move_files("$local_dir/$file");
+		}
+		elsif (-f "$local_dir/$file")
+		{
+			#print ("mv $local_dir/$file $contra_dir\n");
+			print HTML "<li><a href=$file>$file</a></li>\n";
+			system ("mv $local_dir/$file $contra_dir");
+		}
+		else
+		{
+			die("Unrecognized file generated: $file\n");
+		}
+		
+		
+	}
+	
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue May 20 09:59:00 2014 +1000
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="all_fasta.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue May 20 09:59:00 2014 +1000
@@ -0,0 +1,80 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="samtools" version="0.1.18">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2</action>
+                <action type="shell_command">sed -i.bak -e 's/-lcurses/-lncurses/g' Makefile</action>
+                <action type="shell_command">make</action>
+                <action type="move_file">
+                    <source>samtools</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>misc/maq2sam-long</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            Compiling SAMtools requires the ncurses and zlib development libraries.
+        </readme>
+    </package>
+
+
+   <package name="bedtools" version="2.17.0">
+       <install version="1.0">
+           <actions>
+		<action type="download_by_url" target_filename="bedtools-2.17.0.tar.gz" >http://bedtools.googlecode.com/files/BEDTools.v2.17.0.tar.gz</action>
+		<action type="shell_command">make all</action>
+		<action type="make_directory">$INSTALL_DIR/bin</action>
+		<action type="move_directory_files">
+                    <source_directory>bin</source_directory>
+                    <destination_directory>$INSTALL_DIR/bin</destination_directory>
+                </action>                                
+		<action type="set_environment">
+		   <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+		</action>
+           </actions>
+       </install>
+       <readme>
+		GCC version 4.1 or greater is recommended. 3.x versions will typically not compile BEDTools. g++ required
+       </readme>
+   </package>
+   <package name="contra" version="2.0.4">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://downloads.sourceforge.net/project/contra-cnv/CONTRA.V2.0/CONTRA.v2.0.4.tar.gz</action>
+                <action type="shell_command">rm -rf bedtools_installation_guide.txt</action>
+                <action type="shell_command">rm -rf BEDTools-User-Manual.v4.pdf</action>
+                <action type="shell_command">rm -rf BEDTools.v2.11.2.tar.gz</action>
+		<action type="shell_command">rm -rf CONTRA_User_Guide.2.0.pdf</action>
+		<action type="make_directory">$INSTALL_DIR/bin</action>
+		<action type="make_directory">$INSTALL_DIR/bin/scripts</action>
+		<action type="move_file">
+                    <source>contra.py</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>baseline.py</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+		<action type="move_directory_files">
+                    <source_directory>scripts</source_directory>
+                    <destination_directory>$INSTALL_DIR/bin/scripts</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            Contra requires Samtools, Bedtools, Python and R
+        </readme>
+    </package>
+</tool_dependency>
+
+