diff bin/bowtie2eland.pl @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/bowtie2eland.pl	Thu Sep 07 15:06:58 2017 -0400
@@ -0,0 +1,175 @@
+use strict;
+
+my $bowtiefilename=$ARGV[0];
+my $readsfilename=$ARGV[1];
+my $elandfilename=$ARGV[2];
+
+open(readsfile, $readsfilename);
+
+my $detectformat=`head -c 1 $readsfilename`;
+
+#my $firstletter=$detectformat;
+#my $looplinenumbers=4;
+
+#$looplinenumbers=2 if ($detectformat eq ">");
+open(bowtiefile, $bowtiefilename);
+open(elandfile, ">".$elandfilename);
+my $readfilelinenum=0;
+# hash the positions of the alignments for each read id
+my %readposhash;
+my $bowtiepos = tell (bowtiefile);
+while (my $bowtieline=<bowtiefile>)
+{
+	my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
+	if (not exists $readposhash{$bowtiereadname} )
+	{
+		$readposhash{$bowtiereadname} = $bowtiepos;
+	}
+	$bowtiepos = tell (bowtiefile);
+}
+
+while(my $readline=<readsfile>)
+{
+	$readfilelinenum++;
+	if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
+	{
+		chomp($readline);
+		my $readname=substr($readline, 1, length($readline)-1);
+		if( not exists $readposhash{$readname} )
+		{
+			print elandfile $readname,"\tNA\tNM\n";
+			next;
+		}
+		else
+		{
+			my @mapped_ids=();
+                        my @mapped_pos=();
+                        my @mapped_strand=();
+			seek(bowtiefile, $readposhash{$readname}, 0);
+			while (my $bowtieline=<bowtiefile>)
+			{
+				my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
+				if($readname eq $bowtiereadname)
+				{
+					push(@mapped_ids, $mapped_id);
+                                        push(@mapped_pos,$pos);
+                                        push(@mapped_strand,$strand);
+				}
+				else
+				{
+					last;
+				}
+				
+			}
+			print elandfile $readname,"\t";
+			print elandfile "NA\t";
+			print elandfile scalar(@mapped_ids),":0:0\t";
+			for(my $i=0;$i<@mapped_ids;$i++)
+			{
+				print elandfile "/",$mapped_ids[$i];
+				print elandfile ":",$mapped_pos[$i]+1;
+				if($mapped_strand[$i] eq "+")
+				{
+					print elandfile "F0,";
+				}
+				else
+				{
+					print elandfile "R0,";
+				}
+
+			}
+			print elandfile "\n";
+
+		}		
+	}
+}
+
+close(elandfile);
+close(bowtiefile);
+close(readsfile);
+
+exit;
+while(my $bowtieline=<bowtiefile>)
+{
+	my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
+	while(my $readline=<readsfile>)
+	{
+		$readfilelinenum++;
+		if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
+		#if($readline=~/^$detectformat/)
+		{
+			chomp($readline);
+			my $readname=substr($readline, 1, length($readline)-1);
+			
+			
+			if($readname ne $bowtiereadname)
+			{
+				print elandfile $readname,"\tNA\tNM\n";
+				next;
+			}
+			else
+			{
+				my @mapped_ids=();
+				my @mapped_pos=();
+				my @mapped_strand=();
+				push(@mapped_ids, $mapped_id);
+				push(@mapped_pos,$pos);
+				push(@mapped_strand,$strand);
+				while(1)
+				{
+					$bowtieline=<bowtiefile>;
+					my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
+					if( $bowtiereadname eq $readname )
+					{
+						push(@mapped_ids, $mapped_id);
+						push(@mapped_pos,$pos);
+						push(@mapped_strand,$strand);
+					}
+					else
+					{
+						seek(bowtiefile, -1*length($bowtieline),1);
+						print elandfile $readname,"\t";
+						print elandfile "NA\t";
+						print elandfile scalar(@mapped_ids),":0:0\t";
+						for(my $i=0;$i<@mapped_ids;$i++)
+						{
+							print elandfile "/",$mapped_ids[$i];
+							print elandfile ":",$mapped_pos[$i]+1;
+							if($mapped_strand[$i] eq "+")
+							{
+								print elandfile "F0,";
+							}
+							else
+							{
+								print elandfile "R0,";
+							}
+						
+						}
+						print elandfile "\n";
+						last;
+					}
+				}
+				last;
+					
+			}
+		}	
+	}
+}
+
+while(my $readline=<readsfile>)
+{
+	$readfilelinenum++;
+	if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
+	#if($readline=~/^$detectformat/)
+        {
+             chomp($readline);
+             my $readname=substr($readline, 1, length($readline)-1);
+	     print  elandfile $readname,"\tNA\tNM\n";
+	}
+}
+
+close(elandfile);
+close(bowtiefile);
+
+
+close(readsfile);