Repository 'svdetect'
hg clone https://toolshed.g2.bx.psu.edu/repos/bzeitouni/svdetect

Changeset 13:f090bf6ec765 (2012-06-11)
Previous changeset 12:602e6912ac67 (2012-06-11) Next changeset 14:40207106bc44 (2012-07-12)
Commit message:
Uploaded
added:
svdetect/BAM_preprocessingPairs.pl
svdetect/BAM_preprocessingPairs.xml
svdetect/SVDetect_compare.pl
svdetect/SVDetect_compare.xml
svdetect/SVDetect_import.sh
svdetect/SVDetect_import.xml
svdetect/SVDetect_run_parallel.pl
svdetect/SVDetect_run_parallel.xml
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/BAM_preprocessingPairs.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/BAM_preprocessingPairs.pl Mon Jun 11 12:59:11 2012 -0400
[
b'@@ -0,0 +1,340 @@\n+#!/usr/bin/perl -w\n+\n+use strict;\n+use warnings;\n+use Getopt::Std;\n+my $version = \'0.4b_galaxy\';\n+\n+my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools";\n+\n+my %opts = ( t=>1, p=>1, n=>1000000, f=>3, s=>0, S=>10000, o=>"." );\n+\n+getopts(\'dt:p:n:f:s:S:o:b:l:x:N:\', \\%opts); #GALAXY \n+\n+my $working_dir=($opts{o} ne ".")? $opts{o}:"working directory";\n+\n+my $pt_bad_mates_file=$opts{b};  #GALAXY \n+my $pt_log_file=$opts{l}; #GALAXY \n+my $pt_good_mates_file=$opts{x} if($opts{d}); #GALAXY \n+\n+\n+die(qq/\n+    \n+Description:\n+    \n+    Preprocessing of mates to get anomalously mapped mate-pair\\/paired-end reads as input\n+    for SVDetect.\n+\n+    From all pairs mapped onto the reference genome, this script outputs abnormal pairs:\n+        - mapped on two different chromosomes\n+        - with an incorrect strand orientation and\\/or pair order\n+        - with an insert size distance +- sigma threshold\n+    into a file <prefix.ab.bam\\/sam> sorted by read names\n+    \n+    -BAM\\/SAM File input format only.\n+\n+    Version : $version\n+    SAMtools required for BAM files\n+    \n+    \n+Usage:   BAM_preprocessingPairs.pl [options] <all_mate_file.sorted.bam\\/sam>\n+\n+Options: -t BOOLEAN   read type: =1 (Illumina), =0 (SOLiD) [$opts{t}]\n+         -p BOOLEAN   pair type: =1 (paired-end), =0 (mate-pair)  [$opts{p}]\n+         -n INTEGER   number of pairs for calculating mu and sigma lengths [$opts{n}]\n+\t -s INTEGER   minimum value of ISIZE for calculating mu and sigma lengths [$opts{s}]\n+\t -S INTEGER   maximum value of ISIZE for calculating mu and sigma lengths [$opts{S}]\n+         -f REAL      minimal number of sigma fold for filtering pairs [$opts{f}]\n+         -d           dump normal pairs into a file [<prefix.norm.bam\\/sam>] (optional)\n+\t -o STRING    output directory [$working_dir]\n+\n+\\n/) if (@ARGV == 0 && -t STDIN);\n+\n+unless (-d $opts{o}){\n+\tmkdir $opts{o} or die;\n+}\n+$opts{o}.="/" if($opts{o}!~/\\/$/);\n+\n+my $mates_file=shift(@ARGV);\n+\n+$mates_file=readlink($mates_file);\n+\n+my $bad_mates_file=(split(/\\//,$mates_file))[$#_];\n+\n+if($bad_mates_file=~/.(s|b)am$/){\n+    $bad_mates_file=~s/.(b|s)am$/.ab.sam/;\n+    $bad_mates_file=$opts{o}.$bad_mates_file;\n+}\n+\n+else{\n+    die "Error: mate_file with the extension <.bam> or <.sam> needed !\\n";\n+}\n+\n+my $good_mates_file;\n+if($opts{d}){\n+    $good_mates_file=(split(/\\//,$mates_file))[$#_];\n+    $good_mates_file=~s/.(b|s)am$/.norm.sam/;\n+    $good_mates_file=$opts{o}.$good_mates_file;\n+}\n+\n+my $log_file=$opts{o}.$opts{N}.".svdetect_preprocessing.log"; #GALAXY \n+\n+#------------------------------------------------------------------------------#\n+#Calculate mu and sigma\n+\n+open LOG,">$log_file" or die "$0: can\'t open ".$log_file.":$!\\n";\n+\n+print LOG "\\# Calculating mu and sigma lengths...\\n";\n+print LOG "-- file=$mates_file\\n";\n+print LOG "-- n=$opts{n}\\n";\n+print LOG "-- ISIZE min=$opts{s}, max=$opts{S}\\n";\n+\n+my ($record, $sumX,$sumX2) = (0,0,0);\n+my $warn=$opts{n}/10;\n+my $prev_pair="FIRST";\n+\n+my $bam=($mates_file =~ /.bam$/)? 1:0;\n+\n+if($bam){\n+    open(MATES, "${SAMTOOLS_BIN_DIR}/samtools view $mates_file |") or die "$0: can\'t open ".$mates_file.":$!\\n";\n+}else{\n+    open MATES, "<".$mates_file or die "$0: can\'t open ".$mates_file.":$!\\n";\n+}\n+\n+while(<MATES>){\n+    \n+    my @t=split;\n+    \n+    next if ($t[0]=~/^@/);\n+    \n+    my $current_pair=$t[0];\n+    next if($current_pair eq $prev_pair);\n+    $prev_pair=$current_pair;                                                   \n+    \n+    my ($chr1,$chr2,$length)=($t[2],$t[6],abs($t[8]));\n+    \n+    next if ($chr1 eq "*" || $chr2 eq "*");\n+    next if ($length<$opts{s} || $length>$opts{S}) ;\n+    \n+    if($chr2 eq "="){\n+\n+        $sumX += $length;\t\t\t\t\t\t\t#add to sum and sum^2 for mean and variance calculation\n+\t$sumX2 += $length*$length;\n+        $record++;\n+    }\n+\n+    if($record>$warn){\n+\tprint LOG "-- $warn pairs analysed\\n";\n+        $warn+=$warn;\n+    }\n+    \n+    last if ($record>$opts{n});\n+    \n+}\n+close (MATES);\n+\n+$record--;\n+my $m'..b'ad=-1;\n+        $count{unmap}++;\n+        $record++;\n+        next;\n+        \n+    }\n+    \n+    my $strand1 = (($t[1]&0x0010))? \'R\':\'F\';\n+    my $strand2 = (($t[1]&0x0020))? \'R\':\'F\';\n+    my $order1  = (($t[1]&0x0040))? \'1\':\'2\';\n+    my $order2  = (($t[1]&0x0080))? \'1\':\'2\';\n+    \n+    if($order1 == 2){\n+        ($strand1,$strand2)=($strand2,$strand1);\n+        ($chr1,$chr2)=($chr2,$chr1);\n+        ($pos1,$pos2)=($pos2,$pos1);\n+        ($order1,$order2)=($order2,$order1);\n+    }\n+    \n+    my $sense=$strand1.$strand2;\n+    \n+    if($chr1 ne "=" && $chr2 ne "="){\n+        $bad=1;\n+        $count{chr}++;\n+    }\n+    \n+    if($opts{p}){ #paired-end\n+        if(!(($sense eq "FR" && $pos1<$pos2) || ($sense eq "RF" && $pos2<$pos1))){\n+            $bad=1;\n+            $count{sense}++;\n+        }\n+    }else{ #mate-pair\n+        if($opts{t}){ #Illumina\n+            if(!(($sense eq "FR" && $pos2<$pos1) || ($sense eq "RF" && $pos1<$pos2))){\n+            $bad=1;\n+            $count{sense}++;\n+            }\n+        }else{ #SOLiD\n+            if(!(($sense eq "FF" && $pos2<$pos1) || ($sense eq "RR" && $pos1<$pos2))){\n+            $bad=1;\n+            $count{sense}++;\n+            }\n+        }\n+    }\n+    \n+    if(($chr1 eq "=" || $chr2 eq "=") && ($length <$mu - $opts{f}*$sigma || $length>$mu + $opts{f}*$sigma)){\n+        $bad=1;\n+        $count{dist}++;\n+    }\n+    \n+    if($bad){\n+        print AB;\n+        $count{ab}++;\n+        $prev_bad=$bad;\n+    }else{\n+        print NORM if ($opts{d});\n+        $count{norm}++;\n+        $prev_bad=$bad;\n+    }\n+    \n+    $record++;\n+    \n+    if($record>$warn){\n+        print LOG "-- $warn pairs analysed\\n";\n+        $warn+=100000;\n+    }\n+}\n+\n+close AB;\n+close NORM if($opts{d});\n+\n+print LOG "-- Total : $record pairs analysed\\n";\n+print LOG "-- $count{unmap} pairs whose one or both reads are unmapped\\n";\n+print LOG "-- ".($count{ab}+$count{norm})." mapped pairs\\n";\n+print LOG "---- $count{ab} abnormal mapped pairs\\n";\n+print LOG "------ $count{chr} pairs mapped on two different chromosomes\\n";\n+print LOG "------ $count{sense} pairs with incorrect strand orientation and\\/or pair order\\n";\n+print LOG "------ $count{dist} pairs with incorrect insert size distance\\n";\n+print LOG "--- $count{norm} correct mapped pairs\\n";\n+\n+#------------------------------------------------------------------------------#\n+#------------------------------------------------------------------------------#\n+#OUTPUT\n+\n+if($bam){\n+    \n+    my $bam_file=$bad_mates_file;\n+    $bam_file=~s/.sam$/.bam/;\n+    print LOG "\\# Converting sam to bam for abnormal mapped pairs\\n";\n+    system("${SAMTOOLS_BIN_DIR}/samtools view -bS $bad_mates_file > $bam_file 2>".$opts{o}."samtools.log");\n+    unlink($bad_mates_file);\n+    print LOG "-- output created: $bam_file\\n";\n+\n+    system "rm $pt_bad_mates_file ; ln -s $bam_file $pt_bad_mates_file"; #GALAXY\n+    \n+    if($opts{d}){\n+        $bam_file=$good_mates_file;\n+        $bam_file=~s/.sam$/.bam/;\n+        print LOG "\\# Converting sam to bam for correct mapped pairs\\n";\n+        system("${SAMTOOLS_BIN_DIR}/samtools view -bS $good_mates_file > $bam_file 2>".$opts{o}."samtools.log");\n+        unlink($good_mates_file);\n+        print LOG "-- output created: $bam_file\\n";\n+\n+\tsystem "rm $pt_good_mates_file ; ln -s $bam_file $pt_good_mates_file"; #GALAXY\n+\n+    }\n+\n+}\n+\n+else{\n+    print LOG "-- output created: $bad_mates_file\\n";\n+    print LOG "-- output created: $good_mates_file\\n" if($opts{d});\n+}\n+\n+close LOG;\n+\n+system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY\n+\n+\n+#------------------------------------------------------------------------------#\n+#------------------------------------------------------------------------------#\n+sub decimal{\n+    \n+  my $num=shift;\n+  my $digs_to_cut=shift;\n+\n+  $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\\d+\\.(\\d){$digs_to_cut,}/);\n+\n+  return $num;\n+}\n+#------------------------------------------------------------------------------#\n'
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/BAM_preprocessingPairs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/BAM_preprocessingPairs.xml Mon Jun 11 12:59:11 2012 -0400
[
@@ -0,0 +1,77 @@
+<tool id="svdetect_preprocessing" name="BAM preprocessing">
+
+  <description>to get abnormal pairs</description>
+
+  <command interpreter="perl"> BAM_preprocessingPairs.pl -t '$readType' -p '$pairType' -n '$nbrePair' -s '$isizeMin' -S '$isizeMax' -f '$foldPair' -o $__new_file_path__/svdetect -b '$abBAM' -l '$log' -N $sample_name
+ #if $newBam.pairNormal=="yes" 
+ -d -x '$normBAM'
+ #end if
+ '$inputBam'
+  </command>
+
+  <inputs>
+    <param name="sample_name" type="text" value="sample" label="Sample Name"/>
+    <param name="inputBam" type="data" format="bam" label="BAM input file"/>
+    <param name="readType" type="select" label="Read type">
+ <option value="1">Illumina</option>
+ <option value="0">SOLiD</option>
+   </param>
+   <param name="pairType" type="select" label="Library type">
+ <option value="1">Paired-end</option>
+ <option value="0">Mate-Pair</option>
+   </param>
+   <conditional name="newBam">
+   <param name="pairNormal" type="select" label="Do you want an additional bam file listing concordant mapped pairs?" help="Dump normal pairs into a file sample_name.norm.bam/sam">
+ <option value="no">No</option>
+ <option value="yes">Yes</option>
+   </param>
+    <when value="yes">
+   <!-- do nothing here -->
+    </when>
+    <when value="no">
+  <!-- do nothing here -->
+    </when>
+   </conditional>
+   <param name="nbrePair" value="1000000" type="integer" size="30" label="Number of pairs for calculating mu (µ) and sigma (σ) lengths"/>
+   <param name="isizeMin" value="0" type="integer" size="30" label="Minimum value of ISIZE for calculating mu (µ) and sigma (σ) lengths"/>
+   <param name="isizeMax" value="10000" type="integer" size="30" label="Maximum value of ISIZE for calculating mu (µ)and sigma( σ) lengths"/>
+   <param name="foldPair" value="3" type="float" size="30" label="Minimal number of sigma (σ) fold for filtering pairs"/>
+  </inputs>
+
+  <outputs>
+    <data format="bam" name="abBAM" label="${$sample_name}.ab.bam"/> 
+    <data format="txt" name="log" label="${$sample_name}.svdetect_preprocessing.log"/>
+    <data format="bam" name="normBAM" label="${$sample_name}.norm.bam">
+    <filter>newBam['pairNormal'] == 'yes'</filter>
+    </data> 
+  </outputs>
+
+  <help>
+
+**What it does**
+
+Bam_preprocessingPairs - Version 0.4b
+
+Preprocessing of mates to get anomalously mapped mate-pair/paired-end reads as input for SVDetect.
+
+From all pairs mapped onto the reference genome, this script outputs abnormal pairs:
+
+ * mapped on two different chromosomes
+ * with an incorrect strand orientation and/or pair order
+ * with an insert size distance +- sigma threshold
+
+into a file prefix.ab.bam/sam sorted by read names
+    
+-BAM/SAM File input format only.
+  
+SAMtools required for BAM files
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (bruno.zeitouni@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+
+  </help>
+
+</tool>
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/SVDetect_compare.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/SVDetect_compare.pl Mon Jun 11 12:59:11 2012 -0400
[
b'@@ -0,0 +1,716 @@\n+#!/usr/bin/perl -w\n+\n+=pod\n+\n+=head1 NAME\n+\n+SVDetect Compare for Galaxy\n+\n+Version: 0.8 for Galaxy\n+\n+=head1 SYNOPSIS\n+\n+SVDetect_compare.pl links2compare -conf <configuration_file> [-help] [-man]\n+\n+=cut\n+\n+# -------------------------------------------------------------------\n+\n+use strict;\n+use warnings;\n+\n+use Pod::Usage;\n+use Getopt::Long;\n+\n+use Config::General;\n+use Tie::IxHash;\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+#PARSE THE COMMAND LINE\n+my %OPT;\n+GetOptions(\\%OPT,\n+\t   \'conf=s\',\n+\t   \'out1=s\', #GALAXY\n+\t   \'out2=s\', #GALAXY\n+\t   \'out3=s\', #GALAXY\n+\t   \'out4=s\', #GALAXY\n+\t   \'out5=s\', #GALAXY\n+\t   \'out6=s\', #GALAXY\n+\t   \'out7=s\', #GALAXY\n+\t   \'out8=s\', #GALAXY\n+\t   \'out9=s\', #GALAXY\n+\t   \'l=s\', #GALAXY\n+\t   \'N=s\', #GALAXY\n+\t   \'help\',\n+           \'man\'\n+\t  );\n+\n+pod2usage() if $OPT{help};\n+pod2usage(-verbose=>2) if $OPT{man};\n+pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});\n+\n+\n+pod2usage() if(@ARGV<1);\n+\n+tie (my %func, \'Tie::IxHash\',links2compare=>\\&links2compare);\n+\n+foreach my $command (@ARGV){\n+    pod2usage(-message=> "Unknown command \\"$command\\"", -exitval => 2) if (!defined($func{$command}));\n+}\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+#READ THE CONFIGURATION FILE\n+my $conf=Config::General->new(    -ConfigFile        => $OPT{conf},\n+                                  -Tie => "Tie::IxHash",\n+                                  -AllowMultiOptions => 1,\n+\t\t\t\t  -LowerCaseNames    => 1,\n+\t\t\t\t  -AutoTrue => 1);\n+my %CONF= $conf->getall;\n+validateconfiguration(\\%CONF);\t\t\t\t\t\t\t#validation of the configuration parameters\n+\n+\n+my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY\n+my $BEDTOOLS_BIN_DIR="/bioinfo/local/BEDTools/bin"; #GALAXY\n+\n+my $pt_log_file=$OPT{l}; #GALAXY\n+my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_compare.log"; #GALAXY\n+open LOG,">$log_file" or die "$0: can\'t open ".$log_file.":$!\\n";#GALAXY\n+\n+my @pt_sv_file=($OPT{out1},$OPT{out2},$OPT{out3}) if($OPT{out1}); #GALAXY common,sample,reference\n+my @pt_circos_file=($OPT{out4},$OPT{out5},$OPT{out6}) if($OPT{out4}); #GALAXY common,sample,reference\n+my @pt_bed_file=($OPT{out7},$OPT{out8},$OPT{out9}) if($OPT{out7}); #GALAXY common,sample,reference\n+\n+$CONF{compare}{sample_link_file}=readlink($CONF{compare}{sample_link_file});#GALAXY\n+$CONF{compare}{sample_link_file}=~s/.sv.txt//; #GALAXY\n+\n+$CONF{compare}{reference_link_file}=readlink($CONF{compare}{reference_link_file});#GALAXY\n+$CONF{compare}{reference_link_file}=~s/.sv.txt//; #GALAXY\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+#COMMAND EXECUTION\n+foreach my $command (@ARGV){\n+    &{$func{$command}}();\n+}\n+print LOG "-- end\\n";\n+\n+close LOG;#GALAXY\n+system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY\n+\n+exit(0);\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+#FUNCTIONS\n+\n+# -----------------------------------------------------------------------------#\n+#MAIN FUNCTION number 5:Comparison between samples, common or specific links\n+sub links2compare{\n+    \n+    my @compare_files;\n+    \n+    compareSamples($CONF{general}{output_dir},\n+\t\t   $CONF{compare}{list_samples},\n+\t\t   $CONF{compare}{sample_link_file},\n+\t\t   $CONF{compare}{reference_link_file},\n+\t\t   $CONF{compare}{min_overlap},\n+\t\t   $CONF{compare}{same_sv_type},\n+\t\t   \\@compare_files);\n+\n+    my $pt_ind=0;\n+ \n+    for my $input_file (@compa'..b'->[$i] eq \'F\'){\n+\t    $starts->[$i]=$positions->[$i];\n+\t    $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;\n+\t}else{\n+\t    $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;\n+\t    $ends->[$i]=$positions->[$i];\n+\t}\n+    }    \n+}\n+#------------------------------------------------------------------------------#\n+#------------------------------------------------------------------------------#\n+sub floor {\n+    my $nb = $_[0];\n+    $nb=~ s/\\..*//;\n+    return $nb;\n+}\n+#------------------------------------------------------------------------------#\n+#------------------------------------------------------------------------------#\n+sub decimal{\n+    \n+  my $num=shift;\n+  my $digs_to_cut=shift;\n+\n+  $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\\d+\\.(\\d){$digs_to_cut,}/);\n+\n+  return $num;\n+}\n+\n+#------------------------------------------------------------------------------#\n+#------------------------------------------------------------------------------#\n+#Sort links according the concerned chromosomes and their coordinates\n+sub sortLinks{\n+    \n+    my ($links_file,$sortedlinks_file,$unique)=@_;\n+    \n+    print LOG "# Sorting links...\\n";\n+    \n+    my $pipe=($unique)? "| sort -u":"";\n+    system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";\n+\n+}\n+#------------------------------------------------------------------------------#\n+#------------------------------------------------------------------------------#\n+sub getColor{\n+\n+    my($count,$hcolor,$format)=@_;\n+    for my $col ( keys % { $hcolor} ) {\n+       return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);\n+    }\n+    return "white" if($format eq "circos");\n+    return "255,255,255" if($format eq "bed");\n+}\n+#------------------------------------------------------------------------------#\n+#------------------------------------------------------------------------------#\n+#check if the configuration file is correct\n+sub validateconfiguration{\n+    \n+    my %conf=%{$_[0]};\n+    my $list_prgs="@ARGV";\n+    \n+    my @circos_params=qw(organism_id colorcode);\n+    my @bed_params=qw(colorcode);\n+    my @compare_params=qw(list_samples list_read_lengths sample_link_file reference_link_file);\n+    \n+    unless (defined($conf{general}{output_dir})) {\n+\t$conf{general}{output_dir} = ".";\n+    }\n+    unless (-d $conf{general}{output_dir}){\n+\tmkdir $conf{general}{output_dir} or die;\n+    }\n+    $conf{general}{output_dir}.="/" if($conf{general}{output_dir}!~/\\/$/);\n+\n+    \n+    if($list_prgs=~/links2compare/){\n+\tforeach my $p (@compare_params) {\n+\t    die("Error Config : The compare parameter \\"$p\\" is not defined\\n") if (!defined $conf{compare}{$p});\n+\t}\n+\t\n+\tunless (defined($conf{compare}{same_sv_type})) {\n+\t    $conf{compare}{same_sv_type} = 0;\n+\t}\n+\t\n+\tunless (defined($conf{compare}{min_overlap})) {\n+\t    $conf{compare}{min_overlap} = 1E-9;\n+\t}\n+\t\n+\tif($conf{compare}{circos_output}){\n+\t    foreach my $p (@circos_params) {\n+\t\tnext if($list_prgs=~/^ratio/ && $p eq "colorcode");\n+\t\tdie("Error Config : The circos parameter \\"$p\\" is not defined\\n") if (!defined $conf{circos}{$p});\n+\t    }\n+\t}\n+\tif($conf{compare}{bed_output}){\n+\t    foreach my $p (@bed_params) {\n+\t\tdie("Error Config : The bed parameter \\"$p\\" is not defined\\n") if (!defined $conf{bed}{$p});\n+\t    }\n+\t    die("Error Config : The compare parameter \\"list_read_lengths\\" is not defined\\n") if (!defined $conf{compare}{list_read_lengths});\n+\n+\t    my @samples=split(",",$conf{compare}{list_samples});\n+\t    my @read_lengths=split(",",$conf{compare}{list_read_lengths});\n+\t    for my $i (0..$#samples){\n+\t\tmy @l=split("-",$read_lengths[$i]);\n+\t\t$conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};\n+\t    }\n+\t}\n+    }\n+   \n+    \n+}\n+#------------------------------------------------------------------------------#\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n'
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/SVDetect_compare.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/SVDetect_compare.xml Mon Jun 11 12:59:11 2012 -0400
[
@@ -0,0 +1,218 @@
+<tool id="svdetect_compare" name="Compare">
+
+<description>structural variants between two samples</description>
+
+<command interpreter="perl">SVDetect_compare.pl links2compare -conf '$config_file' -l '$log_file' -N '$sample_name.$reference_name'
+
+#if $links2SV
+-out1 '$common_sv_file'
+-out2 '$sample_sv_file'
+-out3 '$reference_sv_file'
+#end if
+
+#if $file_conversion.file_conversion_select=="convert" and $file_conversion.links2circos
+-out4 '$common_circos_file'
+-out5 '$sample_circos_file'
+-out6 '$reference_circos_file'
+#end if
+
+#if $file_conversion.file_conversion_select=="convert" and $file_conversion.links2bed
+-out7 '$common_bed_file'
+-out8 '$sample_bed_file'
+-out9 '$reference_bed_file'
+#end if
+
+</command>
+
+<inputs>
+ <param name="sample_name" type="text" size="20" value="sample" label="Sample Name"/>
+ <param name="sample_read1_length" type="integer" size="10" value="50" label="Sample read 1 length (bp)"/>
+ <param name="sample_read2_length" type="integer" size="10" value="50" label="Sample read 2 length (bp)"/>
+    <param name="sample_mates_file" type="data" format="sv" label="Sample input file" help=".sv file"/>
+
+ <param name="reference_name" type="text" size="20" value="reference" label="Reference Name"/>
+ <param name="reference_read1_length" type="integer" size="10" value="50" label="Reference read 1 length (bp)"/>
+ <param name="reference_read2_length" type="integer" size="10" value="50" label="Reference read 2 length (bp)"/>
+    <param name="reference_mates_file" type="data" format="sv" label="Reference input file" help=".sv file"/>
+
+ <param name="min_overlap" type="float" size="10"  value="0.05" label="Minimum overlap of links required as a fraction"/>
+ <param name="same_sv_type" label="Comparison of SVs with the same type only ?" type="boolean" truevalue="1" falsevalue="0" checked="True"/>
+
+ <param name="links2SV" label="Do you want to have filtered links in a tabulated file format showing significant SVs?" type="boolean" truevalue="1" falsevalue="0" checked="True"/>
+
+ <conditional name="file_conversion">
+ <param name="file_conversion_select" type="select" label="Output file conversion" help="Converts filtered links to Circos/BED files format for graphical view of SVs">
+ <option value="do_not_convert">No</option>
+ <option value="convert">Yes</option>
+ </param>
+ <when value="do_not_convert">
+ <!-- do nothing here -->
+ </when>
+ <when value="convert">
+ <param name="links2circos" label="Converts the link list to the Circos link format" type="boolean" truevalue="1" falsevalue="0" checked="True"/>
+ <param name="links2bed" label="Converts the link list to the UCSC BED format" type="boolean" truevalue="1" falsevalue="0" checked="False"/>
+ <param name="organism_id" type="text" size="10" value="hs" label="Organism ID"/>
+ <repeat name="color_code" title="Color-code" min="1" max="7">
+ <param name="color" type="select" label="Color">
+ <option value="grey">grey</option>
+ <option value="black">black</option>
+ <option value="blue">blue</option>
+ <option value="green">green</option>
+ <option value="purple">purple</option>
+ <option value="orange">orange</option>
+ <option value="red">red</option>
+ </param>
+ <param name="interval" type="text" value="1,3" label="Interval"/>
+ </repeat>
+ </when>
+ </conditional>
+</inputs>
+
+
+
+<outputs>
+ <data format="sv" name="common_sv_file" label="common.compared.sv">
+ <filter>links2SV is True</filter>
+ </data>
+ <data format="sv" name="sample_sv_file" label="${sample_name}.compared.sv">
+ <filter>links2SV is True</filter>
+ </data>
+ <data format="sv" name="reference_sv_file" label="${reference_name}.compared.sv">
+ <filter>links2SV is True</filter>
+ </data>
+
+ <data format="segdup" name="common_circos_file" label="common.compared.segdup">
+ <filter>(
+ file_conversion['file_conversion_select']=="convert" and
+ file_conversion['links2circos'] is True
+ )
+ </filter>
+ </data>
+ <data format="segdup" name="sample_circos_file" label="${sample_name}.compared.segdup">
+ <filter>(
+ file_conversion['file_conversion_select']=="convert" and
+ file_conversion['links2circos'] is True
+ )
+ </filter>
+ </data>
+ <data format="segdup" name="reference_circos_file" label="${reference_name}.compared.segdup">
+ <filter>(
+ file_conversion['file_conversion_select']=="convert" and
+ file_conversion['links2circos'] is True
+ )
+ </filter>
+ </data>
+
+ <data format="bed" name="common_bed_file" label="common.compared.bed">
+ <filter>(
+ file_conversion['file_conversion_select']=="convert" and
+ file_conversion['links2bed'] is True
+ )
+ </filter>
+ </data>
+ <data format="bed" name="sample_bed_file" label="${sample_name}.compared.bed">
+ <filter>(
+ file_conversion['file_conversion_select']=="convert" and
+ file_conversion['links2bed'] is True
+ )
+ </filter>
+ </data>
+ <data format="bed" name="reference_bed_file" label="${reference_name}.compared.bed">
+ <filter>(
+ file_conversion['file_conversion_select']=="convert" and
+ file_conversion['links2bed'] is True
+ )
+ </filter>
+ </data>
+
+ <data format="txt" name="log_file" label="${sample_name}.${reference_name}.svdetect_compare.log"/>
+</outputs>
+
+
+
+<configfiles>
+ <configfile name="config_file">
+&lt;general&gt;
+output_dir=$__new_file_path__/svdetect
+&lt;/general&gt; 
+
+#if $file_conversion.file_conversion_select == "convert"
+#if $file_conversion.links2circos
+&lt;circos&gt;
+organism_id=${file_conversion.organism_id}
+&lt;colorcode&gt;
+#for $color_repeat in $file_conversion.color_code
+${color_repeat.color}=${color_repeat.interval}
+#end for
+&lt;/colorcode&gt;
+&lt;/circos&gt;
+#end if
+#if $file_conversion.links2bed
+&lt;bed&gt;
+&lt;colorcode&gt;
+#for $color_repeat in $file_conversion.color_code
+#if str($color_repeat.color)== "grey"
+190,190,190=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "black"
+0,0,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "blue"
+0,0,255=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "green"
+0,255,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "purple"
+153,50,205=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "orange"
+255,140,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "red"
+255,0,0=${color_repeat.interval}
+#end if
+#end for
+&lt;/colorcode&gt;
+&lt;/bed&gt;
+#end if
+#end if
+
+&lt;compare&gt;
+list_samples=${sample_name},${reference_name}
+list_read_lengths=${sample_read1_length}-${sample_read2_length},${reference_read1_length}-${reference_read2_length}
+sample_link_file=${sample_mates_file}
+reference_link_file=${reference_mates_file}
+min_overlap=${min_overlap}
+same_sv_type=${same_sv_type}
+sv_output=${links2SV}
+#if $file_conversion.file_conversion_select == "convert"
+circos_output=${$file_conversion.links2circos}
+bed_output=${$file_conversion.links2bed}
+#end if
+&lt;/compare&gt; 
+
+ </configfile>
+</configfiles>
+
+  <help>
+**What it does**
+
+SVDetect - Version : 0.8
+
+Comparison of clusters between two samples to get common or sample-specific SVs
+
+This program is designed to compare filtered links between two anomalously mapped mate-pair/paired-end datasets
+and to identify common and sample-specific SVs (like the usual sample/reference design).
+Overlaps between coordinates of clusters and types of SVs are used as parameters of comparison.
+
+Manual documentation available at the http://svdetect.sourceforge.net/Site/Manual.html
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (bruno.zeitouni@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+  </help>
+
+</tool>
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/SVDetect_import.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/SVDetect_import.sh Mon Jun 11 12:59:11 2012 -0400
b
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+
+while getopts "i:o:" optionName; do
+case "$optionName" in
+
+i) INPUT="$OPTARG";;
+o) OUTPUT="$OPTARG";;
+
+esac
+done
+
+rm $OUTPUT
+
+ln -s $INPUT $OUTPUT
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/SVDetect_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/SVDetect_import.xml Mon Jun 11 12:59:11 2012 -0400
[
@@ -0,0 +1,85 @@
+<tool id="svdetect_import" name="Import data">
+  <description>BAM, chromosome info or sv files</description>
+  <command interpreter="bash">SVDetect_import.sh -i $file_path
+  #if str($type.file_type)=="bam"
+  -o $outbamfile
+  #elif str($type.file_type)=="len"
+  -o $outlenfile
+  #elif str($type.file_type)=="sv"
+  -o $outsvfile
+  #end if
+  </command>
+  <inputs>
+    <param name="file_name" type="text" value="file1" label="File Name"/>
+    <conditional name="type">
+      <param name="file_type" type="select" label="Select the file type to import" help="BAM file (BAM) or text file (SAM, chromosome list or a SV tabulated text file)">
+          <option value="bam">BAM file (.bam)</option>
+          <option value="len">Chromosome info file (.len)</option>
+          <option value="sv">SVDetect output file (.sv)</option>
+      </param>
+    <when value="bam">
+      <!-- do nothing here -->
+    </when>
+    <when value="len">
+      <!-- do nothing here -->
+    </when>
+     <when value="sv">
+      <!-- do nothing here -->
+    </when>
+    </conditional>
+     <param name="file_path" type="text" size="150" label="Path to file"/>
+  </inputs>
+  <outputs>
+    <data format="bam" name="outbamfile" label="${file_name}.bam">
+      <filter>type['file_type']=="bam"</filter>
+    </data>
+    <data format="len" name="outlenfile" label="${file_name}.len">
+      <filter>type['file_type']=="len"</filter>
+    </data>
+    <data format="sv" name="outsvfile" label="${file_name}.sv">
+      <filter>type['file_type']=="sv"</filter>
+    </data>
+  </outputs>
+  <help>
+**What it does**
+
+This tool allows you to import quickly a BAM file, a chromosome info file or a SVDetect output file from you computer as inputs for SVDetect.
+
+
+**Example of chromosome file**
+
+Input len file::
+
+    1  chr1  247249719
+    2  chr2  242951149
+    3  chr3  199501827
+    4  chr4  191273063
+    5  chr5  180857866
+    6  chr6  170899992
+    7  chr7  158821424
+    8  chr8  146274826
+    9  chr9  140273252
+    10  chr10  135374737
+    11  chr11  134452384
+    12  chr12  132349534
+    13  chr13  114142980
+    14  chr14  106368585
+    15  chr15  100338915
+    16  chr16  88827254
+    17  chr17  78774742
+    18  chr18  76117153
+    19  chr19  63811651
+    20  chr20  62435964
+    21  chr21  46944323
+    22  chr22  49691432
+    23  chrX  154913754
+    24  chrY  57772954
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (bruno.zeitouni@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+  </help>
+
+</tool>
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/SVDetect_run_parallel.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/SVDetect_run_parallel.pl Mon Jun 11 12:59:11 2012 -0400
[
b'@@ -0,0 +1,3537 @@\n+#!/usr/bin/perl -w\n+\n+=pod\n+\n+=head1 NAME\n+\n+SVDetect - Program designed to the detection of structural variations\n+from paired-end/mate-pair sequencing data, compatible with SOLiD and Illumina (>=1.3) reads\n+\n+Version: 0.8 for Galaxy\n+\n+=head1 SYNOPSIS\n+\n+SVDetect <command> -conf <configuration_file> [-help] [-man]\n+    \n+    Command:\n+\n+    \tlinking\t\tdetection and isolation of links\n+        filtering\tfiltering of links according different parameters\n+        links2circos\tlinks conversion to circos format\n+\tlinks2bed \tpaired-ends of links converted to bed format (UCSC)\n+\tlinks2SV\tformatted output to show most significant SVs\n+\tcnv\t\tcalculate copy-number profiles\n+\tratio2circos\tratio conversion to circos density format\n+\tratio2bedgraph\tratio conversion to bedGraph density format (UCSC)\n+    \n+=head1 DESCRIPTION\n+\n+This is a command-line interface to SVDetect.\n+\n+\n+=head1 AUTHORS\n+\n+Bruno Zeitouni E<lt>bruno.zeitouni@curie.frE<gt>,\n+Valentina Boeva E<lt>valentina.boeva@curie.frE<gt>\n+\n+=cut\n+\n+# -------------------------------------------------------------------\n+\n+use strict;\n+use warnings;\n+\n+use Pod::Usage;\n+use Getopt::Long;\n+\n+use Config::General;\n+use Tie::IxHash;\n+use FileHandle;\n+use Parallel::ForkManager;\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+#PARSE THE COMMAND LINE\n+my %OPT;\n+GetOptions(\\%OPT,\n+\t   \'conf=s\',\n+\t   \'out1=s\', #GALAXY\n+\t   \'out2=s\', #GALAXY\n+\t   \'out3=s\', #GALAXY\n+\t   \'out4=s\', #GALAXY\n+\t   \'out5=s\', #GALAXY\n+\t   \'l=s\', #GALAXY\n+\t   \'N=s\',#GALAXY\n+\t   \'help\',#GALAXY\n+           \'man\'\n+\t  );\n+\n+pod2usage() if $OPT{help};\n+pod2usage(-verbose=>2) if $OPT{man};\n+pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});\n+\n+pod2usage() if(@ARGV<1);\n+\n+tie (my %func, \'Tie::IxHash\',linking=>\\&createlinks,\n+\t\t\t     filtering=>\\&filterlinks,\n+\t\t\t     links2circos=>\\&links2circos,\n+\t\t\t     links2bed=>\\&links2bed,\n+\t\t\t     links2compare=>\\&links2compare,\n+\t\t\t     links2SV=>\\&links2SV,\n+\t\t\t     cnv=>\\&cnv,\n+\t\t\t     ratio2circos=>\\&ratio2circos,\n+\t\t\t     ratio2bedgraph=>\\&ratio2bedgraph);\n+\n+foreach my $command (@ARGV){\n+    pod2usage(-message=> "Unknown command \\"$command\\"", -exitval => 2) if (!defined($func{$command}));\n+}\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+#READ THE CONFIGURATION FILE\n+my $conf=Config::General->new(    -ConfigFile        => $OPT{conf},\n+                                  -Tie => "Tie::IxHash",\n+                                  -AllowMultiOptions => 1,\n+\t\t\t\t  -LowerCaseNames    => 1,\n+\t\t\t\t  -AutoTrue => 1);\n+my %CONF= $conf->getall;\n+validateconfiguration(\\%CONF);\t\t\t\t\t\t\t#validation of the configuration parameters\n+\n+my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY\n+\n+my $pt_log_file=$OPT{l}; #GALAXY\n+my $pt_links_file=$OPT{out1} if($OPT{out1}); #GALAXY\n+my $pt_flinks_file=$OPT{out2} if($OPT{out2}); #GALAXY\n+my $pt_sv_file=$OPT{out3} if($OPT{out3}); #GALAXY\n+my $pt_circos_file=$OPT{out4} if($OPT{out4}); #GALAXY\n+my $pt_bed_file=$OPT{out5} if($OPT{out5}); #GALAXY\n+\n+$CONF{general}{mates_file}=readlink($CONF{general}{mates_file});#GALAXY\n+$CONF{general}{cmap_file}=readlink($CONF{general}{cmap_file});#GALAXY\n+\n+my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_run.log"; #GALAXY\n+open LOG,">$log_file" or die "$0: can\'t open ".$log_file.":$!\\n";#GALAXY\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n+#COMMAND EXECUTION\n+foreach my $command (@ARGV){\n+    &{$func{$command}}();\n+}\n+print LOG "-- end\\n";#GALAXY\n+\n+close LOG;#GALAXY\n+system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY\n+exit'..b'y $chrName (@chrs){\n+\t      \n+\t\tdie("Error Config : The filtering parameter \\"chromosomes\\" is not valid\\n")\n+\t\tif(($chrName!~/^\\-/ && $exclude) || ($chrName=~/^\\-/ && !$exclude));\n+\t\t\n+\t    }\n+\t}\n+\t\n+\tif (( $conf{filtering}{order_filtering} )&& !$conf{filtering}{strand_filtering}) {\n+\t    die("Error Config : The parameter strand_filtering is set to \\"0\\" while order_filtering is selected".\n+\t\t"\\nChange strand_filtering to \\"1\\" if you want to use the order filtering\\n");\n+\t}\n+\tif (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{order_filtering}) {\n+\t    die("Error Config : You should set parameters \\"mu_length\\" and \\"sigma_length\\" to use order filtering\\n");\n+\t}\n+\tif (( $conf{filtering}{insert_size_filtering} )&& !$conf{filtering}{strand_filtering}) {\n+\t    die("Error Config : The parameter strand_filtering is set to \\"0\\" while insert_size_filtering is selected".\n+\t\t"\\nChange strand_filtering to \\"1\\" if you want to use the insert size filtering\\n");\n+\t}\n+\tif (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{insert_size_filtering}) {\n+\t    die("Error Config : You should set parameters \\"mu_length\\" and \\"sigma_length\\" to use discriminate insertions from deletions\\n");\n+\t}\n+\t\n+\tif (!defined($conf{filtering}{indel_sigma_threshold})) {\n+\t    $conf{filtering}{indel_sigma_threshold} = 2;\n+\t}\n+\tif (!defined($conf{filtering}{dup_sigma_threshold})) {\n+\t    $conf{filtering}{dup_sigma_threshold} = 2;\n+\t}\n+\tif (!defined($conf{filtering}{singleton_sigma_threshold})) {\n+\t    $conf{filtering}{singleton_sigma_threshold} = 4;\n+\t}\n+\t\n+\tif (!defined($conf{filtering}{nb_pairs_order_threshold})) {\n+\t    $conf{filtering}{nb_pairs_order_threshold} = 1;\n+\t}\n+\t\n+\tif (!defined($conf{filtering}{final_score_threshold})) {\n+\t    $conf{filtering}{final_score_threshold} = 0.8;\n+\t}\n+\t\n+\tif ($conf{filtering}{nb_pairs_order_threshold}>$conf{filtering}{nb_pairs_threshold}) {\n+\t    die("Error Config : Parameter \\"nb_pairs_order_threshold\\" should not exceed \\"nb_pairs_threshold\\"\\n");\n+\t}\n+\t\n+    }\n+    \n+    if($list_prgs=~/2circos$/){\n+\tforeach my $p (@circos_params) {\n+\t    next if($list_prgs=~/^ratio/ && $p eq "colorcode");\n+\t    die("Error Config : The circos parameter \\"$p\\" is not defined\\n") if (!defined $conf{circos}{$p});\n+\t}\n+    }\n+    \n+    if($list_prgs=~/2bed$/){\n+\tforeach my $p (@bed_params) {\n+\t    die("Error Config : The bed parameter \\"$p\\" is not defined\\n") if (!defined $conf{bed}{$p});\n+\t}\n+    }\n+    \n+    if($list_prgs=~/links2compare/){\n+\tforeach my $p (@compare_params) {\n+\t    die("Error Config : The compare parameter \\"$p\\" is not defined\\n") if (!defined $conf{compare}{$p});\n+\t}\n+\t\n+\tunless (defined($conf{compare}{same_sv_type})) {\n+\t    $conf{compare}{same_sv_type} = 0;\n+\t}\n+\t\n+\tunless (defined($conf{compare}{min_overlap})) {\n+\t    $conf{compare}{min_overlap} = 1E-9;\n+\t}\n+\t\n+\tif($conf{compare}{circos_output}){\n+\t    foreach my $p (@circos_params) {\n+\t\tnext if($list_prgs=~/^ratio/ && $p eq "colorcode");\n+\t\tdie("Error Config : The circos parameter \\"$p\\" is not defined\\n") if (!defined $conf{circos}{$p});\n+\t    }\n+\t}\n+\tif($conf{compare}{bed_output}){\n+\t    foreach my $p (@bed_params) {\n+\t\tdie("Error Config : The bed parameter \\"$p\\" is not defined\\n") if (!defined $conf{bed}{$p});\n+\t    }\n+\t    die("Error Config : The compare parameter \\"list_read_lengths\\" is not defined\\n") if (!defined $conf{compare}{list_read_lengths});\n+\n+\t    my @samples=split(",",$conf{compare}{list_samples});\n+\t    my @read_lengths=split(",",$conf{compare}{list_read_lengths});\n+\t    for my $i (0..$#samples){\n+\t\tmy @l=split("-",$read_lengths[$i]);\n+\t\t$conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};\n+\t    }\n+\t}\n+    }\n+   \n+    \n+}\n+#------------------------------------------------------------------------------#\n+#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#\n'
b
diff -r 602e6912ac67 -r f090bf6ec765 svdetect/SVDetect_run_parallel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svdetect/SVDetect_run_parallel.xml Mon Jun 11 12:59:11 2012 -0400
b
b'@@ -0,0 +1,324 @@\n+<tool id="svdetect_run_parallel" name="Detect clusters of anomalously mapped pairs">\n+\n+<description>and identify structural variants</description>\n+\n+<command interpreter="perl">SVDetect_run_parallel.pl\n+\n+#if $getLinks.linking == "linking"\n+linking\n+<!-- -out1 \'$links_file\' -->\n+#end if\n+#if $getFilteredLinks.filtering == "filtering"\n+filtering\n+<!--- out2 \'$flinks_file\' -->\n+#if str($getFilteredLinks.links2SV) == "create"\n+links2SV\n+-out3 \'$sv_file\'\n+#end if\n+#if  $getFilteredLinks.file_conversion.file_conversion_select=="convert" and str($getFilteredLinks.file_conversion.links2circos) == "create"\n+links2circos\n+-out4 \'$circos_file\'\n+#end if\n+#if  $getFilteredLinks.file_conversion.file_conversion_select=="convert" and str($getFilteredLinks.file_conversion.links2bed) == "create"\n+links2bed\n+-out5 \'$bed_file\'\n+#end if\n+#end if\n+-conf \'$config_file\'\n+-l \'$log_file\'\n+-N \'$sample_name\'\n+\n+</command>\n+\n+<inputs>\n+\t<param name="sample_name" type="text" value="sample" label="Sample Name"/>\n+\t<param name="mates_file" format="bam" type="data" label="Input BAM file (.ab.bam)"/>\n+   \t<param name="cmap_file" format="len" type="data" label="Chromosomes list file (.len)" help="Tabulated file format with Chromosome ID (integer from 1), name and length"/>\n+  \t<param name="mates_orientation" type="select" format="txt" label="Type of sequencing technology and libraries">\n+\t\t<option value="FR">Illumina paired-ends</option>\n+\t\t<option value="RF">Illumina mate-pairs</option>\n+\t\t<option value="FR">SOLiD paired-ends</option>\n+\t\t<option value="RR">SOLiD mate-pairs</option>\n+   \t</param>\n+\t<param name="read1_length" type="integer" size="10" value="50" label="Read 1 length (bp)" help="Length of the first read in a pair (left read)"/>\n+\t<param name="read2_length" type="integer" size="10" value="50" label="Read 2 length (bp)" help="Length of the second read in a pair (right read)"/>\n+\t<param name="sv_type" type="select" format="txt" label="Type of SV to detect">\n+\t\t<option value="all">all types of SVs</option>\n+\t\t<option value="intra">intrachromosomal SVs only</option>\n+\t\t<option value="inter">interchromosomal SVs only</option>\n+  \t</param>\n+   \t\n+   \t<conditional name="getLinks">\n+   \t\t<param name="linking" type="select" label="Linking procedure" help="Detection and isolation of links">\n+\t\t\t<option value="linking">Yes</option>\n+\t\t\t<option value="">No, already done</option>\n+  \t\t</param>\n+\t\t<when value="">\n+    \t\t\t<!-- do nothing here -->\n+   \t\t</when>\n+    \t\t<when value="linking">\n+\t\t\t<param name="splitmate" label="Do you want to split the original mate file per chromosome for parallel computing?" type="boolean" truevalue="split" falsevalue="do_not_split" checked="True" help="Untick it if already done"/>\n+    \t\t\t<param name="window_size" type="integer" size="20" value="3000" label="Window size (bp)" help="Equal to at least \xe2\x80\x9c2\xc2\xb5+2\xe2\x88\x9a2\xcf\x83"/>\n+\t\t\t<param name="step_length" type="integer" size="20" value="250" label="Step length size (bp)" help="Equal to 1/2 or 1/4 of the window size"/>\n+    \t\t</when>\n+   \t</conditional>\n+\n+   \t<conditional name="getFilteredLinks">\n+\t   \t<param name="filtering" type="select" label="Filtering procedure" help="Filtering of links according different parameters and thresholds">\n+\t\t\t<option value="filtering">Yes</option>\n+                        <option value="">No</option>\n+\t  \t</param>\n+\t\t<when value="">\n+\t    \t\t<!-- do nothing here -->\n+\t   \t</when>\n+\t    \t<when value="filtering">\n+\t\t\t\n+\t\t\t<param name="splitlink" label="Do you want to split the original link file per chromosome for parallel computing?" type="boolean" truevalue="split" falsevalue="do_not_split" checked="False" help="Untick it if (the linking is) already done"/>\n+\t\t\t<param name="chromosomes" type="text" size="20" label="List of chromosome names to keep or exclude"/>\n+\t\t\t<param name="nb_pairs_threshold" type="integer" size="20" value="5" label="Minimum number of pairs in a cluster"/>\n+\t\t\n+\t\t\t<conditional name="filter1">\n+\t   \t\t\t<param name='..b'lt;general&gt;\n+input_format = bam\n+sv_type = ${sv_type}\n+mates_orientation=${mates_orientation}\n+read1_length=${read1_length}\n+read2_length=${read2_length}\n+mates_file=${mates_file}\n+cmap_file=${cmap_file}\n+tmp_dir=$__new_file_path__/svdetect/tmp\n+output_dir=$__new_file_path__/svdetect\n+num_threads=8\n+&lt;/general&gt; \n+\n+#if $getLinks.linking == "linking"\n+&lt;detection&gt;\n+#if str($getLinks.splitmate) == "split"\n+split_mate_file=1\n+#else\n+split_mate_file=0\n+#end if\n+window_size=${getLinks.window_size}\n+step_length=${getLinks.step_length}\n+&lt;/detection&gt; \n+#end if\n+\n+#if $getFilteredLinks.filtering == "filtering"\n+&lt;filtering&gt;\n+#if str($getFilteredLinks.splitlink) == "split"\n+split_link_file=1\n+#else\n+split_link_file=0\n+#end if\n+#if str($getFilteredLinks.chromosomes) != ""\n+chromosomes=${getFilteredLinks.chromosomes}\n+#end if\n+nb_pairs_threshold=${getFilteredLinks.nb_pairs_threshold}\n+#if $getFilteredLinks.filter1.strand_filtering == "strand"\n+strand_filtering=1\n+final_score_threshold=${getFilteredLinks.filter1.final_score_threshold}\n+#if $getFilteredLinks.filter1.filter2.order_filtering == "order"\n+order_filtering=1\n+mu_length=${getFilteredLinks.filter1.filter2.mu_length}\n+sigma_length=${getFilteredLinks.filter1.filter2.sigma_length}\n+nb_pairs_order_threshold=${getFilteredLinks.filter1.filter2.nb_pairs_order_threshold}\n+#if $getFilteredLinks.filter1.filter2.filter3.insert_size_filtering == "insert"\n+insert_size_filtering=1\n+indel_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.indel_sigma_threshold}\n+dup_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.dup_sigma_threshold}\n+singleton_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.singleton_sigma_threshold}\n+#else\n+insert_size_filtering=0\n+#end if\n+#else\n+order_filtering=0\n+#end if\n+#else\n+strand_filtering=0\n+#end if\n+&lt;/filtering&gt; \n+#end if\n+\n+#if $getFilteredLinks.filtering == "filtering"\n+#if $getFilteredLinks.file_conversion.file_conversion_select == "convert"\n+#if str($getFilteredLinks.file_conversion.links2circos) == "create"\n+&lt;circos&gt;\n+organism_id=${getFilteredLinks.file_conversion.organism_id}\n+&lt;colorcode&gt;\n+#for $color_repeat in $getFilteredLinks.file_conversion.color_code\n+${color_repeat.color}=${color_repeat.interval}\n+#end for\n+&lt;/colorcode&gt;\n+&lt;/circos&gt;\n+#end if\n+#if str($getFilteredLinks.file_conversion.links2bed) == "create"\n+&lt;bed&gt;\n+&lt;colorcode&gt;\n+#for $color_repeat in $getFilteredLinks.file_conversion.color_code\n+#if str($color_repeat.color)== "grey"\n+190,190,190=${color_repeat.interval}\n+#end if\n+#if str($color_repeat.color)== "black"\n+0,0,0=${color_repeat.interval}\n+#end if\n+#if str($color_repeat.color)== "blue"\n+0,0,255=${color_repeat.interval}\n+#end if\n+#if str($color_repeat.color)== "green"\n+0,255,0=${color_repeat.interval}\n+#end if\n+#if str($color_repeat.color)== "purple"\n+153,50,205=${color_repeat.interval}\n+#end if\n+#if str($color_repeat.color)== "orange"\n+255,140,0=${color_repeat.interval}\n+#end if\n+#if str($color_repeat.color)== "red"\n+255,0,0=${color_repeat.interval}\n+#end if\n+#end for\n+&lt;/colorcode&gt;\n+&lt;/bed&gt;\n+#end if\n+#end if\n+#end if\t\n+\t</configfile>\n+</configfiles>\n+\n+  <help>\n+**What it does**\n+\n+SVDetect - Version : 0.8\n+\n+Parallel version (nCPU=8)\n+\n+SVDetect is a application for the isolation and the type prediction of intra- and inter-chromosomal rearrangements from paired-end/mate-pair sequencing data provided by the high-throughput sequencing technologies\n+\n+This tool aims to identifying structural variations (SVs) with both clustering and sliding-window strategies, and helping in their visualization at the genome scale.\n+SVDetect is compatible with SOLiD and Illumina (>=1.3) reads.\n+\n+Manual documentation available at the http://svdetect.sourceforge.net/Site/Manual.html\n+\n+-----\n+\n+.. class:: infomark\n+\n+Contact Bruno Zeitouni (bruno.zeitouni@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.\n+\n+  </help>\n+\n+</tool>\n'