changeset 23:edddaa8ab855 draft

Uploaded
author mcharles
date Thu, 21 Aug 2014 08:39:15 -0400
parents afaf2e8aedcc
children e8e6b962c1f2
files rapsodyn/extractseq.pl rapsodyn/extractseq.xml
diffstat 2 files changed, 152 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rapsodyn/extractseq.pl	Thu Aug 21 08:39:15 2014 -0400
@@ -0,0 +1,132 @@
+#!/usr/bin/perl
+#V1.10
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $input_variant_file;
+my $input_assembly_file;
+my $WINDOWS_LENGTH = 50;
+
+GetOptions (
+"input_variant_file=s" => \$input_variant_file,
+"input_assembly_file=s" => \$input_assembly_file,
+"window_length=i" => \$WINDOWS_LENGTH
+) or die("Error in command line arguments\n");
+
+open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n");
+open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n");
+
+my @variant_list;
+
+
+### Retrieving the assembly
+my %genome;
+
+my $current_header="";
+my $current_seq="";
+while (my $ligne = <INA>){
+	if ($ligne =~ /^\>(.*?)\s*$/){
+		if ($current_header){
+			$genome{$current_header} = $current_seq;
+		}
+		$current_header=$1;
+		$current_seq = "";
+	}
+	else {
+		if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){
+			$current_seq .= $1;
+		}
+		else {
+			print STDERR "Erreur Parsing n°2\n$ligne\n";
+		}
+	}
+}
+#TRAITEMENT DU DERNIER
+if ($current_header){
+	$genome{$current_header} = $current_seq;
+	undef($current_seq);
+}
+close (INA);
+
+
+### Retrieving the variant
+while (my $ligne=<INV>){
+	if ($ligne !~ /^\s*$/){
+		my %variant;
+		my @fields = split (/\s+/,$ligne);
+		$variant{"ref"}=$fields[0];
+		$variant{"position"}=$fields[1];
+		$variant{"baseref"}=$fields[2];
+		$variant{"depth"}=$fields[3];
+		$variant{"pileup"}=$fields[4];
+		
+		
+		my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1);
+		my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}}));
+		my $length = $stop-$start+1;
+		
+		#print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n";
+
+		$variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length;
+		
+		my $pileup = $variant{"pileup"};
+		$pileup =~ s/\$//g; #the read start at this position
+		$pileup =~ s/\^.//g; #the read end at this position
+		my $descriptor = $variant{"position"}."_".$variant{"depth"}."_";
+		if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){
+			$descriptor .="I".$1."_".$2;
+		}
+		elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){
+			$descriptor .="D".$1."_".$2;
+		}
+		elsif ($pileup=~/([ACGTNacgtn])/){
+			$descriptor.="M1"."_".$1;
+		}
+		else {
+			$descriptor.="?_?";
+		}
+		$variant{"desc"}=$descriptor;
+		
+		print ">",$variant{"ref"},"_",$descriptor,"\n",$variant{"SEQ"},"\n";
+
+		
+		
+		#print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n";
+		
+		push(@variant_list,\%variant);
+	}
+}
+close (INV);
+
+
+
+
+
+
+
+
+
+#***********
+sub min{
+	my $first = shift;
+	my $second = shift;
+	if ($first <= $second){
+		return $first;
+	}
+	else {
+		return $second;
+	}
+}
+
+sub max {
+	my $first = shift;
+	my $second = shift;
+	if ($first >= $second){
+		return $first;
+	}
+	else {
+		return $second;
+	}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rapsodyn/extractseq.xml	Thu Aug 21 08:39:15 2014 -0400
@@ -0,0 +1,20 @@
+<tool id="extractseq" name="extractseq" version="0.03">
+<description>Extract Sequence around variant position</description>
+<command interpreter="perl">
+    extractseq.pl -input_variant_file $input_variant_file -input_assembly_file $input_assembly_file -window_length $window_length > $output_file 
+</command>
+<inputs>
+<param name="input_variant_file"  type="data" format="pileup" label="Select a suitable input VARIANT file from your history"/>
+<param name="input_assembly_file"  type="data" format="fasta" label="Select a suitable input ASSEMBLY file from your history"/>
+<param name="window_length" type="integer" value="50" label="Number of bases extracted before and after the variant position"/>
+</inputs>
+<outputs>
+ <data name="output_file" format="fasta" label="${tool.name} on ${on_string}"/>
+</outputs>
+
+<help>
+
+
+
+</help>
+</tool>