Mercurial > repos > fcaramia > contra
changeset 23:2770f49cb0dc
re-uploading contra
author | Franco Caramia <franco.caramia@petermac.org> |
---|---|
date | Tue, 20 May 2014 09:59:00 +1000 |
parents | e8a98923965e |
children | 6bcf47cc272a |
files | CONTRA_User_Guide.2.0.pdf all_fasta.loc.sample baseline.xml baseline_wrapper.pl contra.xml contra_wrapper.pl tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 8 files changed, 624 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_fasta.loc.sample Tue May 20 09:59:00 2014 +1000 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline.xml Tue May 20 09:59:00 2014 +1000 @@ -0,0 +1,71 @@ +<tool id="baseline_tool" name="Baseline" version="1.0.0"> + <description>: Control files for Contra</description> + <requirements> + <requirement type="package" version="2.17.0">bedtools</requirement> + <requirement type="package" name="samtools" version="0.1.18">samtools</requirement> + <requirement type="package" name="contra" version="2.0.4">contra</requirement> + </requirements> + <command interpreter="perl"> + + baseline_wrapper.pl + + ##Required files + "PLAYEROPTION::-t=$target_file" + + #for $group in $file_group + "BAMLISTENTRY::${group.bam}" + #end for + + "PLAYEROPTION::--name=$sampleName" + "PLAYEROPTION::--trim=$trim" + + ##File to generate the bam list + "BASELINEOUTPUT::$baseline_output" + </command> + <inputs> + <param name="target_file" type="data" format="bed" help="" optional="false" /> + <repeat name="file_group" title="Bam file"> + <param format="bam" name="bam" type="data" label="BAM File" help=""/> + </repeat> + <param name="sampleName" value="baseline" type="text" optional="true" /> + <param name="trim" type="float" value="0.2" optional="true" /> + + + </inputs> + <outputs> + <data name="baseline_output" title="Baseline Output" format="tabular" type="data" label="Baseline_Control.txt" /> + </outputs> + <help> +| + +**Reference** + http://contra-cnv.sourceforge.net/ + +----- + +**What it does** + +Creating a baseline control from multiple samples is can be useful when a matched control is not available. In the CONTRA download page, we have provided several baseline files for some of the platforms that we have tried. Alternatively, the “baseline.py” script that comes with CONTRA can be used to generate a custom baseline file. + +----- + +**Parameters** + +:: + + -t, --target Target region definition file [REQUIRED] [BED format] + + -f, --files Files to be converted to baselines [REQUIRED] [BAM] + + -o, --output Output folder [REQUIRED] + + -c, --trim Portion of outliers to be removed before calculating + average [Default: 0.2] + + -n, --name Output baseline file name [Default: baseline] + + + </help> +</tool> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline_wrapper.pl Tue May 20 09:59:00 2014 +1000 @@ -0,0 +1,68 @@ +#sshpass -p pmac1512 ssh -o StrictHostkeyChecking=no galaxy@pmc-bioinf03 "gmt music play $*" +#echo "gmt music play $*" + + +use strict; +use warnings; +use File::Basename; +use Cwd; +die qq( +Bad numbr of inputs + +) if(!@ARGV); + + +my $player_options = ""; +my $baseline_output; + +my $dir = getcwd; +my $variable = ""; +my $files = ""; +foreach my $input (@ARGV) +{ + my @tmp = split "::", $input; + + if($tmp[0] eq "PLAYEROPTION") + { + $variable = $tmp[1]; + $variable =~ s/=/ /g; + $player_options = "$player_options $variable"; + } + elsif($tmp[0] eq "BASELINEOUTPUT") + { + $baseline_output = $tmp[1]; + } + elsif($tmp[0] eq "BAMLISTENTRY") + { + $files = "$files ${tmp[1]}"; + } + else + { + die("Unknown Input: $input\n"); + } +} + + +my $working_dir = "BASELINE_OUTPUT"; +#remove extension + +#Create Contra Output dir +system ("mkdir $working_dir"); + +#run baseline + +system ("baseline.py --file $files --output $working_dir $player_options > /dev/null"); + +#Search control file in output dir +opendir(DIR, $working_dir); +my @FILES= readdir(DIR); +foreach my $file (@FILES) +{ + my ($filename,$directory,$extension) = fileparse($file, qr/\.[^.]*/); + if ($extension eq ".txt") + { + system ("mv $working_dir/$file $baseline_output"); + } +} +closedir(DIR); +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contra.xml Tue May 20 09:59:00 2014 +1000 @@ -0,0 +1,288 @@ +<tool id="contra_tool" name="Contra Copy number analysis" version="1.0.0"> + <description>: Copy Number Analysis for Targeted Resequencing</description> + <requirements> + <requirement type="package" version="2.17.0">bedtools</requirement> + <requirement type="package" name="samtools" version="0.1.18">samtools</requirement> + <requirement type="package" name="contra" version="2.0.4">contra</requirement> + </requirements> + <command interpreter="perl"> + + contra_wrapper.pl + + ##Ref Genome + #if $genomeSource.refGenomeSource == "history": + "PLAYEROPTION::-f=${genomeSource.ownFile}" + #else: + ##use precomputed indexes + "PLAYEROPTION::-f=${genomeSource.indices.fields.path}" + #end if + + ##Required files + "PLAYEROPTION::-t=$target_file" + "PLAYEROPTION::-s=$alignment_file" + #if $controlSource.refControlSource == "history": + "PLAYEROPTION::-c=${controlSource.control_file}" + #else: + ##use precomputed indexes + "PLAYEROPTION::-c=${controlSource.indices.fields.path}" + #end if + + ##Optional parameter + + #if $option.option == "modify_parameters": + + "PLAYEROPTION::--numBin=$option.numBin" + "PLAYEROPTION::--minReadDepth=$option.minReadDepth" + "PLAYEROPTION::--minNBases=$option.minNbases" + + #if str($option.sam) == "true": + "PLAYEROPTION::--sam" + #end if + + #if str($option.bed) == "true": + "PLAYEROPTION::--bed" + #end if + + "PLAYEROPTION::--pval=$option.pval" + "PLAYEROPTION::--sampleName=$option.sampleName" + + #if str($option.nomultimapped) == "true": + "PLAYEROPTION::--nomultimapped" + #end if + + #if str($option.plot) == "true": + "PLAYEROPTION::--plot" + #end if + + "PLAYEROPTION::--minExon=$option.minExon" + "PLAYEROPTION::--minControlRdForCall=$option.minControlRdForCall" + "PLAYEROPTION::--minTestRdForCall=$option.minTestRdForCall" + "PLAYEROPTION::--minAvgForCall=$option.minAvgForCall" + "PLAYEROPTION::--maxRegionSize=$option.maxRegionSize" + "PLAYEROPTION::--targetRegionSize=$option.targetRegionSize" + + #if str($option.largedeletion) == "true": + "PLAYEROPTION::--largedeletion" + #end if + + "PLAYEROPTION::--smallSegment=$option.smallSegment" + "PLAYEROPTION::--targetRegionSize=$option.targetRegionSize" + "PLAYEROPTION::--largeSegment=$option.largeSegment" + "PLAYEROPTION::--lrCallStart=$option.lrCallStart" + "PLAYEROPTION::--lrCallEnd=$option.lrCallEnd" + "PLAYEROPTION::--passSize=$option.passSize" + #end if + + ##File to generate the bam list + CONTRAOUTPUT::$html_file + CONTRADIR::$html_file.files_path + + </command> + <inputs> + + <conditional name="genomeSource"> + <param name="refGenomeSource" type="select" label="Will you select a reference from your history or use a built-in fasta file?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="indices" type="select" label="Select a reference genome"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" label="Select a reference from history" /> + </when> + </conditional> + + <param name="target_file" type="data" format="bed" help="" optional="false" /> + <param name="alignment_file" type="data" format="bam,sam" help="" optional="false" /> + + + <conditional name="controlSource"> + <param name="refControlSource" type="select" label="Will you select a reference from your history or use a built-in control file?"> + <option value="indexed">Use a built-in control</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="indices" type="select" label="Select a baseline control"> + <options from_data_table="baseline_files"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No files available" /> + </options> + </param> + </when> + <when value="history"> + <param name="control_file" type="data" format="bam,sam,bed,tabular" help="" optional="false" /> + </when> + </conditional> + + <conditional name="option"> + <param name="option" type="select" label="Optional Parameters" help="" optional="true"> + <option value="default_parameters" selected="true">Default Parameters</option> + <option value="modify_parameters">Modify Parameters</option> + </param> + <when value="modify_parameters"> + <param name="numBin" type="integer" value="20" optional="true" /> + <param name="minReadDepth" type="integer" value="10" optional="true" /> + <param name="minNbases" type="integer" value="10" optional="true" /> + <param name="sam" type="select" label="sam" help="" optional="true"> + <option value="true" >true</option> + <option value="false" selected="true">false</option> + </param> + <param name="bed" type="select" label="bed" help="" optional="true"> + <option value="true" >true</option> + <option value="false" selected="true">false</option> + </param> + <param name="pval" type="float" value="0.05" optional="true" /> + <param name="sampleName" value="Contra_Output" type="text" optional="true" /> + <param name="nomultimapped" type="select" label="no multimapped" help="" optional="true"> + <option value="true" >true</option> + <option value="false" selected="true">false</option> + </param> + <param name="plot" type="select" label="plot" help="" optional="true"> + <option value="true" >true</option> + <option value="false" selected="true">false</option> + </param> + <param name="minExon" type="integer" value="2000" optional="true" /> + <param name="minControlRdForCall" type="integer" value="5" optional="true" /> + <param name="minTestRdForCall" type="integer" value="0" optional="true" /> + <param name="minAvgForCall" type="integer" value="20" optional="true" /> + <param name="maxRegionSize" type="integer" value="0" optional="true" /> + <param name="targetRegionSize" type="integer" value="200" optional="true" /> + <param name="largedeletion" type="select" label="large deletion" help="" optional="true"> + <option value="true" >true</option> + <option value="false" selected="true">false</option> + </param> + + <param name="smallSegment" type="integer" value="1" optional="true" /> + <param name="largeSegment" type="integer" value="25" optional="true" /> + <param name="lrCallStart" type="float" value="-0.3" optional="true" /> + <param name="lrCallEnd" type="float" value="0.3" optional="true" /> + <param name="passSize" type="float" value="0.5" optional="true" /> + + </when> + </conditional> + </inputs> + <outputs> + <data name="html_file" format="html" label="Contra Output" /> + </outputs> + <help> +| + + +**Reference** + http://contra-cnv.sourceforge.net/ + +----- + +**What it does** + +CONTRA is a tool for copy number variation (CNV) detection for targeted resequencing data such as those from whole-exome capture data. CONTRA calls copy number gains and losses for each target region with key strategies include the use of base-level log-ratios to remove GC-content bias, correction for an imbalanced library size effect on log-ratios, and the estimation of log-ratio variations via binning and interpolation. It takes standard alignment formats (BAM/SAM) and output in variant call format (VCF 4.0) for easy integration with other next generation sequencing analysis package. + + +----- + +**Required Parameters** + +:: + + -t, --target Target region definition file [BED format] + + -s, --test Alignment file for the test sample [BAM/SAM] + + -c, --control Alignment file for the control sample + [BAM/SAM/BED – baseline file] + + --bed **option has to be supplied for control + with baseline file.** + + -f, --fasta Reference genome [FASTA] + + -o, --outFolder the folder name (and its path) to store the output + of the analysis (this new folder will be created – + error message occur if the folder exists) + +----- + +**Optional Parameters** + +:: + + --numBin Numbers of bins to group the regions. User can + specify multiple experiments with different numbers + of bins (comma separated). [Default: 20] + + --minReadDepth The threshold for minimum read depth for each bases + (see Step 2 in CONTRA workflow) [Default: 10] + + --minNBases The threshold for minimum number of bases for each + target regions (see Step 2 in CONTRA workflow) + [Default: 10] + + --sam If the specified test and control samples are in + SAM format. [Default: False] (It will always take + BAM samples as default) + + --bed If specified, control will be a baseline file in + BED format. [Default: False] + Please refer to the Baseline Script section for + instruction how to create baseline files from set + of BAMfiles. A set of baseline files from different + platform have also been provided in the CONTRA + download page. + + --pval The p-value threshold for filtering. Based on Adjusted + P-Values. Only regions that pass this threshold will + be included in the VCF file. [Default: 0.05] + + --sampleName The name to be appended to the front of the default output + name. By default, there will be nothing appended. + + --nomultimapped The option to remove multi-mapped reads + (using SAMtools with mapping quality > 0). + [default: FALSE] + + -p, --plot If specified, plots of log-ratio distribution for each + bin will be included in the output folder [default: FALSE] + + --minExon Minimum number of exons in one bin (if less than this number + , bin that contains small number of exons will be merged to + the adjacent bins) [Default : 2000] + + --minControlRdForCall Minimum Control ReadDepth for call [Default: 5] + + --minTestRdForCall Minimum Test ReadDepth for call [Default: 0] + + --minAvgForCall Minimum average coverage for call [Default: 20] + + --maxRegionSize Maximum region size in target region (for breaking + large regions into smaller regions. By default, + maxRegionSize=0 means no breakdown). [Default : 0] + + --targetRegionSize Target region size for breakdown (if maxRegionSize + is non-zero) [Default: 200] + + -l, --largeDeletion If specified, CONTRA will run large deletion analysis (CBS). + User must have DNAcopy R-library installed to run the + analysis. [False] + + --smallSegment CBS segment size for calling large variations [Default : 1] + + --largeSegment CBS segment size for calling large variations [Default : 25] + + --lrCallStart Log ratios start range that will be used to call CNV + [Default : -0.3] + + --lrCallEnd Log ratios end range that will be used to call CNV + [Default : 0.3] + + --passSize Size of exons that passed the p-value threshold compare + to the original exons size [Default: 0.5] + </help> +</tool> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contra_wrapper.pl Tue May 20 09:59:00 2014 +1000 @@ -0,0 +1,91 @@ +use strict; +use warnings; + +use FindBin; +use File::Path qw(make_path); +use File::Spec; + + +die "Bad number of inputs" if(!@ARGV); + +my $player_options = ""; +my $contra_output; +my $contra_dir; + + + +foreach my $input (@ARGV) +{ + my @tmp = split "::", $input; + + if($tmp[0] eq "PLAYEROPTION") + { + my $variable = $tmp[1]; + $variable =~ s/=/ /g; + print "$variable\n"; + $player_options = "$player_options $variable"; + } + elsif($tmp[0] eq "CONTRAOUTPUT") + { + $contra_output = $tmp[1]; + } + elsif($tmp[0] eq "CONTRADIR") + { + $contra_dir = $tmp[1]; + } + else + { + die("Unknown input: $input\n"); + } +} + + +my $working_dir = "CONTRA_OUTPUT"; +make_path($contra_dir); +#remove extension + +#run contra +system(File::Spec->catfile($FindBin::Bin, 'contra.py') . " -o $working_dir $player_options > /dev/null 2>&1"); + +#set html +#print "$contra_output - $working_dir\n"; +open(HTML, ">$contra_output"); +print HTML "<html><head><title>Contra: Copy Number Analysis for Targeted Resequencing</title></head><body><h3>Contra Output Files:</h3><p><ul>\n"; +move_files($working_dir); +print HTML "</ul></p>\n"; +close(HTML); + +sub move_files +{ + my $local_dir = $_[0]; + opendir(DIR, $local_dir); + #print ("Openning: $local_dir\n"); + my @FILES= readdir(DIR); + closedir(DIR); + foreach my $file (@FILES) + { + if ($file eq "." || $file eq "..") + { + #print ("./ or ../ skipped\n"); + } + elsif (-d "$local_dir/$file") + { + #print ("moving to: $local_dir/$file\n"); + move_files("$local_dir/$file"); + } + elsif (-f "$local_dir/$file") + { + #print ("mv $local_dir/$file $contra_dir\n"); + print HTML "<li><a href=$file>$file</a></li>\n"; + system ("mv $local_dir/$file $contra_dir"); + } + else + { + die("Unrecognized file generated: $file\n"); + } + + + } + +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue May 20 09:59:00 2014 +1000 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="all_fasta.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue May 20 09:59:00 2014 +1000 @@ -0,0 +1,80 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="0.1.18"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2</action> + <action type="shell_command">sed -i.bak -e 's/-lcurses/-lncurses/g' Makefile</action> + <action type="shell_command">make</action> + <action type="move_file"> + <source>samtools</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="move_file"> + <source>misc/maq2sam-long</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + Compiling SAMtools requires the ncurses and zlib development libraries. + </readme> + </package> + + + <package name="bedtools" version="2.17.0"> + <install version="1.0"> + <actions> + <action type="download_by_url" target_filename="bedtools-2.17.0.tar.gz" >http://bedtools.googlecode.com/files/BEDTools.v2.17.0.tar.gz</action> + <action type="shell_command">make all</action> + <action type="make_directory">$INSTALL_DIR/bin</action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR/bin</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + GCC version 4.1 or greater is recommended. 3.x versions will typically not compile BEDTools. g++ required + </readme> + </package> + <package name="contra" version="2.0.4"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://downloads.sourceforge.net/project/contra-cnv/CONTRA.V2.0/CONTRA.v2.0.4.tar.gz</action> + <action type="shell_command">rm -rf bedtools_installation_guide.txt</action> + <action type="shell_command">rm -rf BEDTools-User-Manual.v4.pdf</action> + <action type="shell_command">rm -rf BEDTools.v2.11.2.tar.gz</action> + <action type="shell_command">rm -rf CONTRA_User_Guide.2.0.pdf</action> + <action type="make_directory">$INSTALL_DIR/bin</action> + <action type="make_directory">$INSTALL_DIR/bin/scripts</action> + <action type="move_file"> + <source>contra.py</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="move_file"> + <source>baseline.py</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="move_directory_files"> + <source_directory>scripts</source_directory> + <destination_directory>$INSTALL_DIR/bin/scripts</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + Contra requires Samtools, Bedtools, Python and R + </readme> + </package> +</tool_dependency> + +