view bin/rmap2eland.pl @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
line wrap: on
line source

use strict;

my $rmapfilename=$ARGV[0];
my $readsfilename=$ARGV[1];
my $elandfilename=$ARGV[2];

my $detectformat=`head -c 1 $readsfilename`;

#system("grep \"$detectformat\" $readsfilename |sort >$readsfilename.sort");
system("awk 'NR%2==1' $readsfilename |sort >$readsfilename.sort");
system("sort -k4,4 $rmapfilename >$rmapfilename.sort");


open(readsfile, $readsfilename.".sort");



#$looplinenumbers=2 if ($detectformat eq ">");
open(rmapfile, $rmapfilename.".sort");
open(elandfile, ">".$elandfilename);

while(my $rmapline=<rmapfile>)
{
	chomp($rmapline);
	my ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
	while(my $readline=<readsfile>)
	{
		if($readline=~/^$detectformat/)
		{
			chomp($readline);
			my $readname=substr($readline, 1, length($readline)-1);
			
			
			if($readname ne $rmapreadname)
			{
				print elandfile $readname,"\tNA\tNM\n";
				next;
			}
			else
			{
				my @mapped_ids=();
				my @mapped_pos=();
				my @mapped_strand=();
				push(@mapped_ids, $mapped_id);
				push(@mapped_pos,$start);
				push(@mapped_strand,$strand);
				while(1)
				{
					$rmapline=<rmapfile>;
					chomp($rmapline);
					($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
					if( $rmapreadname eq $readname )
					{
						push(@mapped_ids, $mapped_id);
						push(@mapped_pos,$start);
						push(@mapped_strand,$strand);
					}
					else
					{
						seek(rmapfile, -1*length($rmapline)-1,1);
						print elandfile $readname,"\t";
						print elandfile "NA\t";
						print elandfile scalar(@mapped_ids),":0:0\t";
						for(my $i=0;$i<@mapped_ids;$i++)
						{
							print elandfile "/",$mapped_ids[$i];
							print elandfile ":",$mapped_pos[$i]+1;
							if($mapped_strand[$i] eq "+")
							{
								print elandfile "F0,";
							}
							else
							{
								print elandfile "R0,";
							}
						
						}
						print elandfile "\n";
						last;
					}
				}
				last;
					
			}
		}	
	}
}

while(my $readline=<readsfile>)
{
        if($readline=~/^$detectformat/)
        {
             chomp($readline);
             my $readname=substr($readline, 1, length($readline)-1);
             print  elandfile $readname,"\tNA\tNM\n";
        }
}

close(elandfile);
close(rmapfile);


close(readsfile);