Repository 'temp'
hg clone https://toolshed.g2.bx.psu.edu/repos/portiahollyoak/temp

Changeset 21:9672fe07a232 (2016-12-05)
Previous changeset 20:6e02b9179a24 (2016-10-26)
Commit message:
planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
modified:
scripts/TEMP_Absence.sh
scripts/TEMP_Insertion.sh
scripts/make.bp.bed.pl
scripts/pickClippedFastq.pl
scripts/pickOverlapPair.ex.pl
scripts/pickOverlapPair.ex_MEM.pl
scripts/pickOverlapPair.in.pl
scripts/pickSoftClipping.over.pl
scripts/pickUniqIntervalPos.pl
scripts/pickUniqMate.pl
scripts/pickUniqPairFastq.pl
scripts/pickUniqPairFastq_MEM.pl
scripts/pickUniqPos.pl
scripts/pickUniqPos_MEM.pl
added:
scripts/pickUniqMate.pl.orig
scripts/pickUniqPairFastq.pl.orig
scripts/pickUniqPairFastq_MEM.pl.orig
scripts/pickUniqPos.pl.orig
scripts/pickUniqPos_MEM.pl.orig
temp_absences.xml
temp_insertions.xml
test-data/dm3_chr2L.2bit
test-data/test_TE_annotation.bed
test-data/test_absence_out.bed
test-data/test_chromosome.sorted.bam
test-data/test_concensus.fa
test-data/test_insertions_out.bed
removed:
temp.xml
test-data/chr2l_bwa_mem.bam
test-data/dm6_chr2l.twobit
test-data/test_TE_annotation.gff3
test-data/test_chromosome.absence.refined.bp.summary
test-data/test_chromosome.insertion.refined.bp.summary
test-data/test_consensus.fa
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/TEMP_Absence.sh
--- a/scripts/TEMP_Absence.sh Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/TEMP_Absence.sh Mon Dec 05 09:58:47 2016 -0500
b
@@ -116,7 +116,7 @@
 fi
 
 #Detect excision sites
-samtools view -XF 0x2 $name > $i.unpair.sam
+samtools view -F 0x2 $name > $i.unpair.sam
 awk -F "\t" '{OFS="\t"; if ($9 != 0) print $0}' $i.unpair.sam > temp1.sam
 perl $BINDIR/pickUniqIntervalPos.pl temp1.sam $INSERT > $i.unproper.uniq.interval.bed
 
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/TEMP_Insertion.sh
--- a/scripts/TEMP_Insertion.sh Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/TEMP_Insertion.sh Mon Dec 05 09:58:47 2016 -0500
[
@@ -126,7 +126,7 @@
 fi
 
 # Get the mate seq of the uniq-unpaired reads
-samtools view -XF 0x2  $name > $i.unpair.sam
+samtools view -F 0x2  $name > $i.unpair.sam
 if [[ $SCORE -eq 0 ]]
 then
     perl $BINDIR/pickUniqPairFastq.pl $i.unpair.sam $i.unpair.uniq
@@ -144,7 +144,7 @@
 
 
 #Summary
-samtools view -hSXF 0x2 $i.unpair.uniq.transposons.sam > $i.unpair.uniq.transposons.unpair.sam
+samtools view -hSF 0x2 $i.unpair.uniq.transposons.sam > $i.unpair.uniq.transposons.unpair.sam
 perl $BINDIR/pickUniqMate.pl $i.unpair.uniq.transposons.unpair.sam $i.unpair.uniq.bed > $i.unpair.uniq.transposons.bed
 cp $i.unpair.uniq.transposons.bed $i.unpair.uniq.transposons.filtered.bed
 
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/make.bp.bed.pl
--- a/scripts/make.bp.bed.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/make.bp.bed.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -18,7 +18,7 @@
  my $l=$i+3;
  system("cut -f7,4,6,$j,$k,$l $ARGV[0] > temp");
  open (input, "<temp") or die "Can't open temp since $!\n";
- open (output, ">>$name.insertion.bp.bed") or die "Can't open $name.insertion.bp.bed since $!\n";
+ open (output, ">$name.insertion.bp.bed") or die "Can't open $name.insertion.bp.bed since $!\n";
  my $header=<input>;
  while (my $line=<input>) {
      chomp($line);
@@ -26,6 +26,7 @@
      if (($b[4] ne "0")||($b[5] ne "0")) {
  my @c=split(/\:/, $b[2]);
  my @d=split(/\./, $c[1]);
+ if ($c[0] eq "P") {next;}
  if ($d[0] > $d[1]) {
      my $temp=$d[0];
      $d[0]=$d[1];
@@ -33,9 +34,9 @@
  }
  my $lower=$d[0];
  my $upper=$d[1];
-         if (($lower >= 0) && ($upper >= 0)) {
-    print output "$c[0]\t$lower\t$upper\t$b[0]\t$b[1]\t$b[3]\t$b[4]\t$b[5]\n";
-         }
+ if (($lower >= 0) && ($upper >= 0)) {
+     print output "$c[0]\t$lower\t$upper\t$b[0]\t$b[1]\t$b[3]\t$b[4]\t$b[5]\n";
+ }
  $chrs{$c[0]}=1;
      }
  }
@@ -85,25 +86,35 @@
      }
      
      if (-s "tmp2") {
- system("bedtools subtract -a tmp -b tmp2 -f 1.0 > tmp3");
- open (input, "<tmp3") or die "Can't open tmp3 since $!\n";
+ my %to_filter=();
+ open (input, "<tmp2") or die "Can't open tmp2 since $!\n";
+ while (my $line=<input>) {
+     chomp($line);
+     my @a=split(/\t/, $line);
+     $to_filter{"$a[0]\:$a[1]\:$a[2]\:$a[3]\:$a[5]"}=1;
+ }
+ close input;
+ open (input, "<tmp") or die "Can't open tmp since $!\n";
  open (output, ">$name.insertion.bp.bed") or die "Can't open $name.insertion.bp.bed since $!\n";
  while (my $line=<input>) {
      chomp($line);
      my @a=split(/\t/, $line);
-     my $direction="sense";
-     if ($a[5] eq "-") {$direction="antisense";}
-     my $chr_num=$a[0];
-     $chr_num =~ s/chr//;
-     if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];}
-     print output "$chr_num\t$a[1]\t$a[2]\t$a[3]\t$direction\t$a[6]\t$a[7]\t$a[8]\n";
+     if (!defined $to_filter{"$a[0]\:$a[1]\:$a[2]\:$a[3]\:$a[5]"}) {
+ my $direction="sense";
+ if ($a[5] eq "-") {$direction="antisense";}
+ my $chr_num=$a[0];
+ $chr_num =~ s/chr//;
+ if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];}
+ print output "$chr_num\t$a[1]\t$a[2]\t$a[3]\t$direction\t$a[6]\t$a[7]\t$a[8]\n";
+     }
  }
  close input;
  close output;
      }
- }
 
- system("rm tmp*");
+     system("rm tmp*");
+
+ }
 
     }
 }
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickClippedFastq.pl
--- a/scripts/pickClippedFastq.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickClippedFastq.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -38,7 +38,7 @@
     my $upper=$a[2]+15;
     if (($lower > 0)&&($upper > 0))
     {
- system("samtools view -hXf 0x2 $ARGV[0].sorted.bam $a[0]\:$lower\-$upper > temp.sam");
+ system("samtools view -hf 0x2 $ARGV[0].sorted.bam $a[0]\:$lower\-$upper > temp.sam");
 
  open in,"temp.sam";
  my %pe1;
@@ -48,7 +48,9 @@
      chomp;
      my @f=split/\t/,$_,12;
      ## read number 1 or 2
-     my ($rnum)=$f[1]=~/(\d)$/;
+     #my ($rnum)=$f[1]=~/(\d)$/;
+     my $rnum=1;
+     if (($f[1] & 128) == 128) {$rnum=2;}
      
      ## XT:A:* 
      my ($xt)=$f[11]=~/XT:A:(.)/;
@@ -62,7 +64,7 @@
  my $clipseq="";
  my @z=split(/M/, $f[5]);
 
- if (($f[5]=~/S$/)&&($f[1]=~/r/))
+ if (($f[5]=~/S$/)&&(($f[1] & 16) == 16))
  {
      my (@cigar_m)=$f[5]=~/(\d+)M/g;
      my (@cigar_d)=$f[5]=~/(\d+)D/g;
@@ -79,7 +81,7 @@
      }
  }
 
-                elsif (($f[1]=~/R/)&&($z[0]=~/S/))
+                elsif ((($f[1] & 32) == 32)&&($z[0]=~/S/))
                 {
                     $coor=$f[3]; $strand="+";
 
@@ -121,7 +123,9 @@
         {
             chomp;
             my @f=split/\t/,$_,12;
-            my ($rnum)=$f[1]=~/(\d)$/;
+            #my ($rnum)=$f[1]=~/(\d)$/;
+     my $rnum=1;
+     if (($f[1] & 128) == 128) {$rnum=2;}
             my ($xt)=$f[11]=~/XT:A:(.)/;
 
             if ($f[5]=~/S/) {
@@ -132,7 +136,7 @@
                 my $clipseq="";
                 my @z=split(/M/, $f[5]);
 
-                if (($f[5]=~/S$/)&&($f[1]=~/r/))
+                if (($f[5]=~/S$/)&&(($f[1] & 16) == 16))
                 {
                     my (@cigar_m)=$f[5]=~/(\d+)M/g;
                     my (@cigar_d)=$f[5]=~/(\d+)D/g;
@@ -149,7 +153,7 @@
                     }
                 }
 
-                elsif (($f[1]=~/R/)&&($z[0]=~/S/))
+                elsif ((($f[1] & 32) == 32)&&($z[0]=~/S/))
                 {
                     $coor=$f[3]; $strand="+";
 
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickOverlapPair.ex.pl
--- a/scripts/pickOverlapPair.ex.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickOverlapPair.ex.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -56,7 +56,7 @@
     my $chr_num=$a[0];
     $chr_num =~ s/chr//;
     if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];}
-    system("samtools view -Xf 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam");
+    system("samtools view -f 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam");
     
     open in,"temp.sam";
     my %ps=();
@@ -70,14 +70,16 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
  my ($xt)=$f[11]=~/XT:A:(.)/;
 
  ## Coordinate
  my $coor=$f[3];
- if ($f[1]=~/r/)
+ if (($f[1] & 16) == 16)
  {
      if ($xt eq "U") {$uniqm{$f[0]}=1;}
      my (@cigar_m)=$f[5]=~/(\d+)M/g;
@@ -87,7 +89,7 @@
      my $aln_ln=sum(@cigar_m,@cigar_d);
      $me{$f[0]}=$f[3]+$aln_ln-1;
  }
- elsif ($f[1]=~/R/) {
+ elsif (($f[1] & 32) == 32) {
      $ps{$f[0]}=$f[3];
      if ($xt eq "U") {$uniqp{$f[0]}=1;}
  }
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickOverlapPair.ex_MEM.pl
--- a/scripts/pickOverlapPair.ex_MEM.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickOverlapPair.ex_MEM.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -56,7 +56,7 @@
     my $chr_num=$a[0];
     $chr_num =~ s/chr//;
     if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];}
-    system("samtools view -Xf 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam");
+    system("samtools view -f 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam");
     
     open in,"temp.sam";
     my %ps=();
@@ -70,7 +70,9 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
         my $xt="";
@@ -92,7 +94,7 @@
 
  ## Coordinate
  my $coor=$f[3];
- if ($f[1]=~/r/)
+ if (($f[1] & 16) == 16)
  {
      if ($xt eq "U") {$uniqm{$f[0]}=1;}
      my (@cigar_m)=$f[5]=~/(\d+)M/g;
@@ -102,7 +104,7 @@
      my $aln_ln=sum(@cigar_m,@cigar_d);
      $me{$f[0]}=$f[3]+$aln_ln-1;
  }
- elsif ($f[1]=~/R/) {
+ elsif (($f[1] & 32) == 32) {
      $ps{$f[0]}=$f[3];
      if ($xt eq "U") {$uniqp{$f[0]}=1;}
  }
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickOverlapPair.in.pl
--- a/scripts/pickOverlapPair.in.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickOverlapPair.in.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -29,7 +29,7 @@
     }
     my $lower=$b[0]-$frag;
     my $upper=$c[0]+$frag;
-    system("samtools view -Xf 0x2 $title $a[0]\:$lower\-$upper > temp.sam");
+    system("samtools view -f 0x2 $title $a[0]\:$lower\-$upper > temp.sam");
     
     open in,"temp.sam";
     my %ps=();
@@ -41,13 +41,15 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
  my ($xt)=$f[11]=~/XT:A:(.)/;
 
  ## Coordinate
- if ($f[1]=~/r/)
+ if (($f[1] & 16) == 16)
  {
      my (@cigar_m)=$f[5]=~/(\d+)M/g;
      my (@cigar_d)=$f[5]=~/(\d+)D/g;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickSoftClipping.over.pl
--- a/scripts/pickSoftClipping.over.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickSoftClipping.over.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -38,7 +38,7 @@
     $chr_num =~ s/chr//;
     if (($chrs{$a[2]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[2];}
     system("samtools view -bu $title $chr_num\:$lower\-$upper > temp.bam");
-    system("samtools view -Xf 0x2 temp.bam > temp.sam");
+    system("samtools view -f 0x2 temp.bam > temp.sam");
 
     my $leftseq="";
     my $rightseq="";
@@ -79,7 +79,9 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
  my ($xt)=$f[11]=~/XT:A:(.)/;
@@ -94,7 +96,7 @@
             my $strand="";
      my @z=split(/M/, $f[5]);
 
-            if (($f[5]=~/S$/)&&($f[1]=~/r/))
+            if (($f[5]=~/S$/)&&(($f[1] & 16) == 16))
             {
  my (@cigar_m)=$f[5]=~/(\d+)M/g;
                 my (@cigar_d)=$f[5]=~/(\d+)D/g;
@@ -116,7 +118,7 @@
  }
 # print "\n";
             }
-            elsif (($f[1]=~/R/)&&($z[0]=~/S/)) {
+            elsif ((($f[1] & 32) == 32)&&($z[0]=~/S/)) {
  $coor=$f[3]; $strand="+";
  my (@clipped)=$z[0]=~/(\d+)S/g;
                 my $cliplen=sum(@clipped);
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqIntervalPos.pl
--- a/scripts/pickUniqIntervalPos.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickUniqIntervalPos.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -10,7 +10,9 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+ my $rnum=1;
+ if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
  my ($xt)=$f[11]=~/XT:A:(.)/;
@@ -18,7 +20,7 @@
  my $strand="+";
 
  ## parse CIGAR
- if(($f[1]=~/R/)&&($f[8] > $ARGV[1])&&($f[8] <= 10000))
+ if((($f[1] & 32) == 32)&&($f[8] > $ARGV[1])&&($f[8] <= 10000))
         {
                 # CIGAR
                 my (@cigar_m)=$f[5]=~/(\d+)M/g;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqMate.pl
--- a/scripts/pickUniqMate.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickUniqMate.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -51,7 +51,10 @@
 
  if ($mm > 5) {next;}
 
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
+
  # CIGAR
  my (@cigar_m)=$f[5]=~/(\d+)M/g;
  my (@cigar_d)=$f[5]=~/(\d+)D/g;
@@ -60,7 +63,7 @@
  my $aln_ln=sum(@cigar_m,@cigar_d);
 
  my $strand="+";
- if($f[1]=~/r/)
+ if(($f[1] & 16) == 16)
  {
      my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
      $f[9]=$seq->revcom->seq;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqMate.pl.orig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/pickUniqMate.pl.orig Mon Dec 05 09:58:47 2016 -0500
[
@@ -0,0 +1,106 @@
+#!/share/bin/perl
+use List::Util qw(sum);
+use Bio::Seq;
+
+die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1;
+
+open in,$ARGV[1];
+my %uniq;
+while(<in>)
+{
+ chomp;
+ my @f=split;
+ $uniq{$f[3]}=[@f];
+}
+close in;
+
+open in,$ARGV[0];
+my (%te,@ref,%ref);
+while(<in>)
+{
+ chomp;
+ my @f=split/\s+/,$_;
+ # headers
+ if(/^\@SQ/)
+ {
+ my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/;
+ push @ref,[$sn,$ln];
+ $ref{$sn}=$#ref;
+ next;
+ }
+
+ # unmapped
+ next if $f[2] eq "*";
+
+ my $mm=200;
+ my $xa="";
+ for my $q (11..$#f)
+ {
+     if($f[$q]=~/NM:/)
+     {
+ $mm=$f[$q];
+ $mm =~ s/NM://;
+     }
+
+     if($f[$q]=~/XA:Z:/)
+     {
+ ($xa)=$f[$q]=~/XA:Z:(.*);$/;
+ last;
+     }
+ }
+
+ if ($mm > 5) {next;}
+
+ my ($rnum)=$f[1]=~/(\d)$/;
+ # CIGAR
+ my (@cigar_m)=$f[5]=~/(\d+)M/g;
+ my (@cigar_d)=$f[5]=~/(\d+)D/g;
+ my (@cigar_s)=$f[5]=~/(\d+)S/g;
+ my (@cigar_i)=$f[5]=~/(\d+)I/g;
+ my $aln_ln=sum(@cigar_m,@cigar_d);
+
+ my $strand="+";
+ if($f[1]=~/r/)
+ {
+     my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
+     $f[9]=$seq->revcom->seq;
+     $strand="-";
+ }
+
+ # align to the junctions
+ if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1])
+ {
+     if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1])
+     {
+ $f[2]=${$ref[$ref{$f[2]}+1]}[0];
+ $f[3]=1;
+ $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1);
+     }
+     else
+     {
+ $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1;
+     }
+ }
+
+ $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";";
+     
+ # XA tag
+ my @xa=split(";",$xa);
+ $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa;
+
+}
+close in;
+
+foreach my $id (keys %pe)
+{
+ next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"};
+ foreach my $rid (keys %{$pe{$id}})
+ {
+ my $mate_id=($rid==1)?2:1;
+ if(exists $uniq{$id."/".$mate_id})
+ {
+ ${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid};
+ print join("\t",@{$uniq{$id."/".$mate_id}}),"\n";
+ }
+ }
+}
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq.pl
--- a/scripts/pickUniqPairFastq.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickUniqPairFastq.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -13,13 +13,15 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
  my ($xt)=$f[11]=~/XT:A:(.)/;
 
  ## revcom the read mapped to the reverse strand
- if($f[1]=~/r/)
+ if(($f[1] & 16) == 16)
  {
  my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
  $f[9]=$seq->revcom->seq;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq.pl.orig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/pickUniqPairFastq.pl.orig Mon Dec 05 09:58:47 2016 -0500
[
@@ -0,0 +1,45 @@
+#!/share/bin/perl
+use Bio::Seq;
+
+die "perl $0 <sam> <output prefix>\n" if @ARGV<1;
+
+open m1,">$ARGV[1].1.fastq";
+open m2,">$ARGV[1].2.fastq";
+
+open in,$ARGV[0];
+my %pe;
+while(<in>)
+{
+ chomp;
+ my @f=split/\t/,$_,12;
+ ## read number 1 or 2
+ my ($rnum)=$f[1]=~/(\d)$/;
+
+ ## XT:A:* 
+ my ($xt)=$f[11]=~/XT:A:(.)/;
+
+ ## revcom the read mapped to the reverse strand
+ if($f[1]=~/r/)
+ {
+ my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
+ $f[9]=$seq->revcom->seq;
+ $f[10]=reverse $f[10];
+ }
+ if (($rnum == 1) || ($rnum == 2))
+ {
+     ${$pe{$f[0]}}[$rnum-1]=[$xt,$f[9],$f[10]];
+ }
+}
+close in;
+
+foreach my $id (keys %pe)
+{
+ my @rid=@{$pe{$id}};
+ if (($rid[0][1] ne "") && ($rid[1][1] ne "") && (($rid[0][0] eq "U" || $rid[1][0] eq "U")))
+ {
+ print m2 "@"."$id/2","\n",$rid[1][1],"\n","+$id/2","\n",$rid[1][2],"\n";
+ print m1 "@"."$id/1","\n",$rid[0][1],"\n","+$id/1","\n",$rid[0][2],"\n";
+ }
+}
+close m1;
+close m2;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq_MEM.pl
--- a/scripts/pickUniqPairFastq_MEM.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickUniqPairFastq_MEM.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -13,7 +13,9 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
  my $xt="";
@@ -34,7 +36,7 @@
  }
 
  ## revcom the read mapped to the reverse strand
- if($f[1]=~/r/)
+ if (($f[1] & 16) == 16)
  {
  my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
  $f[9]=$seq->revcom->seq;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq_MEM.pl.orig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/pickUniqPairFastq_MEM.pl.orig Mon Dec 05 09:58:47 2016 -0500
[
@@ -0,0 +1,60 @@
+#!/share/bin/perl
+use Bio::Seq;
+
+die "perl $0 <sam> <output prefix>\n" if @ARGV<1;
+
+open m1,">$ARGV[1].1.fastq";
+open m2,">$ARGV[1].2.fastq";
+
+open in,$ARGV[0];
+my %pe;
+while(<in>)
+{
+ chomp;
+ my @f=split/\t/,$_,12;
+ ## read number 1 or 2
+ my ($rnum)=$f[1]=~/(\d)$/;
+
+ ## XT:A:* 
+ my $xt="";
+ my @a=split(/\s+/, $_);
+ my $as=0;
+ my $xs=0;
+ for my $i (11..$#a) {
+     if ($a[$i] =~ /^AS:i:/) {
+ $a[$i] =~ s/AS:i://;
+ $as=$a[$i];
+     }
+     elsif ($a[$i] =~ /^XS:i:/) {
+ $a[$i] =~ s/XS:i://;
+ $xs=$a[$i];
+     }
+     if (($xs > 0) && ($as-$xs <= $ARGV[2])) {$xt="R";}
+     elsif ($as > 0) {$xt="U";}
+ }
+
+ ## revcom the read mapped to the reverse strand
+ if($f[1]=~/r/)
+ {
+ my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
+ $f[9]=$seq->revcom->seq;
+ $f[10]=reverse $f[10];
+ }
+ if (($rnum == 1) || ($rnum == 2))
+ {
+     ${$pe{$f[0]}}[$rnum-1]=[$xt,$f[9],$f[10]];
+ }
+}
+close in;
+
+foreach my $id (keys %pe)
+{
+ my @rid=@{$pe{$id}};
+ if (($rid[0][1] ne "") && ($rid[1][1] ne "") && (($rid[0][0] eq "U" || $rid[1][0] eq "U")))
+ {
+ print m2 "@"."$id/2","\n",$rid[1][1],"\n","+$id/2","\n",$rid[1][2],"\n";
+ print m1 "@"."$id/1","\n",$rid[0][1],"\n","+$id/1","\n",$rid[0][2],"\n";
+ }
+}
+close m1;
+close m2;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos.pl
--- a/scripts/pickUniqPos.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickUniqPos.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -10,14 +10,16 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
  my ($xt)=$f[11]=~/XT:A:(.)/;
 
  my $strand="+";
  ## revcomp
- if($f[1]=~/r/)
+ if(($f[1] & 16) == 16)
         {
                 my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
                 $f[9]=$seq->revcom->seq;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos.pl.orig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/pickUniqPos.pl.orig Mon Dec 05 09:58:47 2016 -0500
[
@@ -0,0 +1,41 @@
+#!/share/bin/perl
+use Bio::Seq;
+use List::Util qw(sum);
+
+die "perl $0 <sam>\n" if @ARGV<1;
+open in,$ARGV[0];
+my %pe;
+while(<in>)
+{
+ chomp;
+ my @f=split/\t/,$_,12;
+ ## read number 1 or 2
+ my ($rnum)=$f[1]=~/(\d)$/;
+
+ ## XT:A:* 
+ my ($xt)=$f[11]=~/XT:A:(.)/;
+
+ my $strand="+";
+ ## revcomp
+ if($f[1]=~/r/)
+        {
+                my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
+                $f[9]=$seq->revcom->seq;
+ $strand="-";
+        }
+
+ ## parse CIGAR
+ if($xt eq "U")
+        {
+                # CIGAR
+                my (@cigar_m)=$f[5]=~/(\d+)M/g;
+                my (@cigar_d)=$f[5]=~/(\d+)D/g;
+                my (@cigar_s)=$f[5]=~/(\d+)S/g;
+                my (@cigar_i)=$f[5]=~/(\d+)I/g;
+                my $aln_ln=sum(@cigar_m,@cigar_d);
+
+ print $f[2],"\t",$f[3]-1,"\t",$f[3]-1+$aln_ln,"\t$f[0]/$rnum\t",$f[9],"\t",$strand,"\n";
+ }
+}
+close in;
+
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos_MEM.pl
--- a/scripts/pickUniqPos_MEM.pl Wed Oct 26 07:24:45 2016 -0400
+++ b/scripts/pickUniqPos_MEM.pl Mon Dec 05 09:58:47 2016 -0500
[
@@ -10,7 +10,9 @@
  chomp;
  my @f=split/\t/,$_,12;
  ## read number 1 or 2
- my ($rnum)=$f[1]=~/(\d)$/;
+ #my ($rnum)=$f[1]=~/(\d)$/;
+        my $rnum=1;
+        if (($f[1] & 128) == 128) {$rnum=2;}
 
  ## XT:A:* 
         my $xt="";
@@ -32,7 +34,7 @@
 
  my $strand="+";
  ## revcomp
- if($f[1]=~/r/)
+ if(($f[1] & 16) == 16)
         {
                 my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
                 $f[9]=$seq->revcom->seq;
b
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos_MEM.pl.orig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/pickUniqPos_MEM.pl.orig Mon Dec 05 09:58:47 2016 -0500
[
@@ -0,0 +1,56 @@
+#!/share/bin/perl
+use Bio::Seq;
+use List::Util qw(sum);
+
+die "perl $0 <sam>\n" if @ARGV<1;
+open in,$ARGV[0];
+my %pe;
+while(<in>)
+{
+ chomp;
+ my @f=split/\t/,$_,12;
+ ## read number 1 or 2
+ my ($rnum)=$f[1]=~/(\d)$/;
+
+ ## XT:A:* 
+        my $xt="";
+        my @a=split(/\s+/, $_);
+        my $as=0;
+        my $xs=0;
+        for my $i (11..$#a) {
+            if ($a[$i] =~ /^AS:i:/) {
+                $a[$i] =~ s/AS:i://;
+                $as=$a[$i];
+            }
+            elsif ($a[$i] =~ /^XS:i:/) {
+                $a[$i] =~ s/XS:i://;
+                $xs=$a[$i];
+            }
+            if (($xs > 0) && ($as-$xs <= $ARGV[1])) {$xt="R";}
+            elsif ($as > 0) {$xt="U";}
+        }
+
+ my $strand="+";
+ ## revcomp
+ if($f[1]=~/r/)
+        {
+                my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
+                $f[9]=$seq->revcom->seq;
+ $strand="-";
+        }
+
+ ## parse CIGAR
+ if($xt eq "U")
+        {
+                # CIGAR
+                my (@cigar_m)=$f[5]=~/(\d+)M/g;
+                my (@cigar_d)=$f[5]=~/(\d+)D/g;
+                my (@cigar_s)=$f[5]=~/(\d+)S/g;
+                my (@cigar_i)=$f[5]=~/(\d+)I/g;
+                my $aln_ln=sum(@cigar_m,@cigar_d);
+
+ print $f[2],"\t",$f[3]-1,"\t",$f[3]-1+$aln_ln,"\t$f[0]/$rnum\t",$f[9],"\t",$strand,"\n";
+ }
+}
+close in;
+
b
diff -r 6e02b9179a24 -r 9672fe07a232 temp.xml
--- a/temp.xml Wed Oct 26 07:24:45 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,124 +0,0 @@
-<tool id ="run_TEMP" name="TEMP" version="0.2.2.0">
-    <description></description>
-    <requirements>
-        <requirement type="package" version="1.6.924">perl-bioperl</requirement>
-        <requirement type="package" version="0.7.13">bwa</requirement>
-        <requirement type="package" version="2.26.0gx">bedtools</requirement>
-        <requirement type="package" version="324">ucsc-twobittofa</requirement>
-        <requirement type="package" version="0.1.19">samtools</requirement>
-    </requirements>
-    <stdio>
-        <exit_code range="1:" />
-    </stdio>
-    <command><![CDATA[
-        ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai &&
-        ln -f -s "$alignment" alignment.sorted.bam &&
-        bash $__tool_directory__/scripts/TEMP_Insertion.sh
-        -x "$minimum_score_difference"
-        -i alignment.sorted.bam
-        -s $__tool_directory__/scripts
-        -r "$consensus_te_seqs"
-        -t "$te_locations"
-        #if $te_families:
-        -u "$te_families"
-        #end if
-        -m "$mismatches"
-        -f "$median_insertsize"
-        -c \${GALAXY_SLOTS:-2} &&
-        bash $__tool_directory__/scripts/TEMP_Absence.sh
-        -x "$minimum_score_difference"
-        -i alignment.sorted.bam
-        -s $__tool_directory__/scripts
-        -r "$te_locations"
-        -t "$reference2bit"
-        -f "$median_insertsize"
-        -c \${GALAXY_SLOTS:-2} &&
-        mv alignment.insertion.refined.bp.summary $insertion_summary &&
-        mv alignment.absence.refined.bp.summary $absence_summary &&
-        tar -czf archive.tar.gz  *insertion* *excision* && mv archive.tar.gz $archive
-    ]]></command>
-    <inputs>
-        <param format="bam" name="alignment" type="data" label="Alignment bam file"/>
-        <param format="twobit" name="reference2bit" type="data" label="Reference twobit file"/>
-        <param format="fasta" name="consensus_te_seqs" type="data" label="Consensus TE Seqs fasta file"/>
-        <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/>
-        <param format="tabular" name="te_families" type="data" optional="True" label="TE Identifiers and Families"/>
-        <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/>
-        <param name="mismatches" min="0" max="5" type="integer" value="3" label="Allow this many mismatches when aligning to TEs"/>
-        <param name="minimum_score_difference" type="integer" min="10" max="37" value="30" label="Minimum difference between mapping scores"/>
-    </inputs>
-    <outputs>
-        <data format="bed" type="data" name="insertion_summary" label="${alignment.element_identifier} Insertions" />
-        <data format="bed" type="data" name="absence_summary" label="${alignment.element_identifier} Absences" />
-        <data format="tar" type="data" name="archive" label="${alignment.element_identifier} Compressed output files" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="alignment" value="chr2l_bwa_mem.bam" ftype="bam"/>
-            <param name="reference2bit" value="dm6_chr2l.twobit" ftype="twobit"/>
-            <param name="consensus_te_seqs" value="test_consensus.fa" ftype="fasta"/>
-            <param name="te_locations" value="test_TE_annotation.gff3" ftype="bed"/>
-            <param name="median_insertsize" value="500" ftype="integer"/>
-            <output name="insertion_summary" file="test_chromosome.insertion.refined.bp.summary" ftype="bed"/>
-            <output name="absence_summary" file="test_chromosome.absence.refined.bp.summary" ftype="bed"/>
-        </test>
-    </tests>
-    <help> <![CDATA[
-
-
-TEMP
--------------
-TEMP is a software package for detecting transposable elements (TEs)  insertions and absences from pooled high-throughput sequencing data
-
-Current version v1.04
-
-Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA
-
-*Input files/variables*
--------------------------
-* Alignment file in BAM format
-* Reference genome used in aligning, in fasta or twobit format.
-* Transposable Elements' Consensus Sequences in fasta format.
-* Annotations of TEs in reference genome in bed format.
-* TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results.
-* Median Insert Length
-* Number of Mismatches allowed (default 3)
-* Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped.
-
-*Output files*
------------------
-* **In the Insertions output file there are 14 columns:**
-* Column 1: The chromosome where the detected insertion happens.
-* Column 2: The coordinate of the start position of the detected insertion.
-* Column 3: The coordinate of the end position of the detected insertion.
-* Column 4: The TE family that the detected insertion belongs to.
-* Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand.
-* Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side.
-* Column 7: The total number of read pairs that support the detected insertion.
-* Column 8: The estimated population frequency of the detected insertion.
-* Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0.
-* Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand.
-* Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads).
-* Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads).
-
-
------
-
-
-* **In the Absences output file there are 14 columns:**
-* Column 1: The chromosome where the detected absence happens.
-* Column 2: The coordinate of the start position of the detected absence.
-* Column 3: The coordinate of the end position of the detected absence.
-* Column 4: The TE family that the detected insertion belongs to.
-* Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
-* Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
-* Column 7: The number of reads supporting the absence.
-* Column 8: The number of reads supporting the reference (no absence).
-* Column 9: Estimated population frequency of the detected absence event.
-
-
-    ]]> </help>
-    <citations>
-        <citation type="doi">10.1093/nar/gku323</citation>
-    </citations>
-</tool>
b
diff -r 6e02b9179a24 -r 9672fe07a232 temp_absences.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/temp_absences.xml Mon Dec 05 09:58:47 2016 -0500
[
@@ -0,0 +1,103 @@
+<tool id ="TEMP_absences" name="TEMP Excision" version="0.3.0">
+    <description>finds TEs that have excised relative to the reference</description>
+    <requirements>
+        <requirement type="package" version="1.6.924=pl5.22.0_0">perl-bioperl</requirement>
+        <requirement type="package" version="0.7.13">bwa</requirement>
+        <requirement type="package" version="2.25.0">bedtools</requirement>
+        <requirement type="package" version="324">ucsc-twobittofa</requirement>
+        <requirement type="package" version="1.3.1">samtools</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai &&
+        ln -f -s "$alignment" alignment.sorted.bam &&
+        bash $__tool_directory__/scripts/TEMP_Absence.sh
+        -x "$minimum_score_difference"
+        -i alignment.sorted.bam
+        -s $__tool_directory__/scripts
+        -r "$te_locations"
+        -t "$reference2bit"
+        -f "$median_insertsize"
+        -c \${GALAXY_SLOTS:-2} &&
+        mv alignment.absence.refined.bp.summary $absence_summary
+    ]]></command>
+    <inputs>
+        <param format="bam" name="alignment" type="data" label="Alignment bam file"/>
+        <param format="twobit" name="reference2bit" type="data" label="Reference twobit file"/>
+        <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/>
+        <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/>
+        <param name="minimum_score_difference" type="integer" min="10" max="37" value="30" label="Minimum difference between mapping scores"/>
+    </inputs>
+    <outputs>
+        <data format="bed" name="absence_summary" label="${alignment.element_identifier} Absences" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="alignment" value="test_chromosome.sorted.bam" ftype="bam"/>
+            <param name="reference2bit" value="dm3_chr2L.2bit" ftype="twobit"/>
+            <param name="te_locations" value="test_TE_annotation.bed" ftype="bed"/>
+            <param name="median_insertsize" value="500" ftype="integer"/>
+            <output name="absence_summary" file="test_absence_out.bed" ftype="bed"/>
+        </test>
+    </tests>
+    <help> <![CDATA[
+
+
+TEMP
+-------------
+TEMP is a software package for detecting transposable elements (TEs)  insertions and absences from pooled high-throughput sequencing data
+
+Current version v1.04
+
+Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA
+
+*Input files/variables*
+-------------------------
+* Alignment file in BAM format
+* Reference genome used in aligning, in fasta or twobit format.
+* Transposable Elements' Consensus Sequences in fasta format.
+* Annotations of TEs in reference genome in bed format.
+* TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results.
+* Median Insert Length
+* Number of Mismatches allowed (default 3)
+* Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped.
+
+*Output files*
+-----------------
+* **In the Insertions output file there are 14 columns:**
+* Column 1: The chromosome where the detected insertion happens.
+* Column 2: The coordinate of the start position of the detected insertion.
+* Column 3: The coordinate of the end position of the detected insertion.
+* Column 4: The TE family that the detected insertion belongs to.
+* Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand.
+* Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side.
+* Column 7: The total number of read pairs that support the detected insertion.
+* Column 8: The estimated population frequency of the detected insertion.
+* Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0.
+* Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand.
+* Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads).
+* Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads).
+
+
+-----
+
+
+* **In the Absences output file there are 14 columns:**
+* Column 1: The chromosome where the detected absence happens.
+* Column 2: The coordinate of the start position of the detected absence.
+* Column 3: The coordinate of the end position of the detected absence.
+* Column 4: The TE family that the detected insertion belongs to.
+* Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
+* Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
+* Column 7: The number of reads supporting the absence.
+* Column 8: The number of reads supporting the reference (no absence).
+* Column 9: Estimated population frequency of the detected absence event.
+
+
+    ]]> </help>
+    <citations>
+        <citation type="doi">10.1093/nar/gku323</citation>
+    </citations>
+</tool>
b
diff -r 6e02b9179a24 -r 9672fe07a232 temp_insertions.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/temp_insertions.xml Mon Dec 05 09:58:47 2016 -0500
[
@@ -0,0 +1,110 @@
+<tool id ="TEMP_insertions" name="TEMP Insertion" version="0.3.0">
+    <description>finds TE insertions relative to reference</description>
+    <requirements>
+        <requirement type="package" version="1.6.924=pl5.22.0_0">perl-bioperl</requirement>
+        <requirement type="package" version="0.7.13">bwa</requirement>
+        <requirement type="package" version="2.25.0">bedtools</requirement>
+        <requirement type="package" version="324">ucsc-twobittofa</requirement>
+        <requirement type="package" version="1.3.1">samtools</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+        ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai &&
+        ln -f -s "$alignment" alignment.sorted.bam &&
+        bash $__tool_directory__/scripts/TEMP_Insertion.sh
+        -x "$minimum_score_difference"
+        -i alignment.sorted.bam
+        -s $__tool_directory__/scripts
+        -r "$consensus_te_seqs"
+        -t "$te_locations"
+        #if $te_families:
+        -u "$te_families"
+        #end if
+        -m "$mismatches"
+        -f "$median_insertsize"
+        -c \${GALAXY_SLOTS:-2} &&
+        mv alignment.insertion.refined.bp.summary $insertion_summary
+    ]]></command>
+    <inputs>
+        <param format="bam" name="alignment" type="data" label="Alignment bam file"/>
+        <param format="fasta" name="consensus_te_seqs" type="data" label="Consensus TE Seqs fasta file"/>
+        <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/>
+        <param format="tabular" name="te_families" type="data" optional="True" label="TE Identifiers and Families"/>
+        <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/>
+        <param name="mismatches" min="0" max="5" type="integer" value="3" label="Allow this many mismatches when aligning to TEs"/>
+        <param name="minimum_score_difference" type="integer" min="0" max="37" value="30" label="Minimum difference between mapping scores"/>
+    </inputs>
+    <outputs>
+        <data format="bed" name="insertion_summary" label="${alignment.element_identifier} Insertions" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="alignment" value="test_chromosome.sorted.bam" ftype="bam"/>
+            <param name="consensus_te_seqs" value="test_concensus.fa" ftype="fasta"/>
+            <param name="te_locations" value="test_TE_annotation.bed" ftype="bed"/>
+            <param name="median_insertsize" value="500" ftype="integer"/>
+            <param name="minimum_score_difference" value="0" ftype="integer"/>
+            <output name="insertion_summary" file="test_insertions_out.bed" ftype="bed" compare="sim_size"/>
+        </test>
+    </tests>
+    <help> <![CDATA[
+
+
+TEMP
+-------------
+TEMP is a software package for detecting transposable elements (TEs)  insertions and absences from pooled high-throughput sequencing data
+
+Current version v1.04
+
+Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA
+
+*Input files/variables*
+-------------------------
+* Alignment file in BAM format
+* Reference genome used in aligning, in fasta or twobit format.
+* Transposable Elements' Consensus Sequences in fasta format.
+* Annotations of TEs in reference genome in bed format.
+* TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results.
+* Median Insert Length
+* Number of Mismatches allowed (default 3)
+* Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped.
+
+*Output files*
+-----------------
+* **In the Insertions output file there are 14 columns:**
+* Column 1: The chromosome where the detected insertion happens.
+* Column 2: The coordinate of the start position of the detected insertion.
+* Column 3: The coordinate of the end position of the detected insertion.
+* Column 4: The TE family that the detected insertion belongs to.
+* Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand.
+* Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side.
+* Column 7: The total number of read pairs that support the detected insertion.
+* Column 8: The estimated population frequency of the detected insertion.
+* Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0.
+* Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand.
+* Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads).
+* Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads).
+
+
+-----
+
+
+* **In the Absences output file there are 14 columns:**
+* Column 1: The chromosome where the detected absence happens.
+* Column 2: The coordinate of the start position of the detected absence.
+* Column 3: The coordinate of the end position of the detected absence.
+* Column 4: The TE family that the detected insertion belongs to.
+* Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
+* Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
+* Column 7: The number of reads supporting the absence.
+* Column 8: The number of reads supporting the reference (no absence).
+* Column 9: Estimated population frequency of the detected absence event.
+
+
+    ]]> </help>
+    <citations>
+        <citation type="doi">10.1093/nar/gku323</citation>
+    </citations>
+</tool>
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/chr2l_bwa_mem.bam
b
Binary file test-data/chr2l_bwa_mem.bam has changed
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/dm3_chr2L.2bit
b
Binary file test-data/dm3_chr2L.2bit has changed
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/dm6_chr2l.twobit
b
Binary file test-data/dm6_chr2l.twobit has changed
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_TE_annotation.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_TE_annotation.bed Mon Dec 05 09:58:47 2016 -0500
b
@@ -0,0 +1,115 @@
+chr2L 1301606 1302488 FBgn0001167_gypsy . -
+chr2L 2094501 2094580 FBgn0000155_roo . -
+chr2L 2100429 2109522 FBgn0000155_roo . -
+chr2L 2112167 2118361 FBgn0003007_opus . -
+chr2L 2118446 2119772 FBgn0003007_opus . -
+chr2L 2159453 2159556 FBgn0000155_roo . -
+chr2L 2267349 2267457 FBgn0000155_roo . +
+chr2L 2294096 2299243 FBgn0000349_copia . -
+chr2L 2378805 2378893 FBgn0000155_roo . +
+chr2L 2530303 2530389 FBgn0000155_roo . +
+chr2L 2565592 2569028 FBgn0000005_297 . +
+chr2L 2565667 2565886 FBgn0000004_17.6 . +
+chr2L 2565869 2566006 FBgn0063450_Tom1 . +
+chr2L 2565871 2566024 FBgn0061485_rover . +
+chr2L 2565920 2566024 FBgn0063917_McClintock . +
+chr2L 2566158 2569026 FBgn0000004_17.6 . +
+chr2L 2566674 2566848 FBgn0061485_rover . +
+chr2L 2567367 2569022 FBgn0061485_rover . +
+chr2L 2567598 2567816 FBgn0063917_McClintock . +
+chr2L 2567665 2569027 FBgn0044355_Quasimodo . +
+chr2L 2568060 2569027 FBgn0026065_Idefix . +
+chr2L 2568062 2569018 FBgn0063917_McClintock . +
+chr2L 2568070 2569004 FBgn0063447_accord . +
+chr2L 2568121 2568988 FBgn0004082_Tirant . +
+chr2L 2568137 2569006 FBgn0063432_gypsy5 . +
+chr2L 2568137 2568942 FBgn0040267_Transpac . +
+chr2L 2568153 2569001 FBgn0063782_accord2 . -
+chr2L 2568154 2568990 FBgn0023131_ZAM . +
+chr2L 2568193 2568993 FBgn0003007_opus . +
+chr2L 2568251 2568697 FBgn0000006_412 . +
+chr2L 2568264 2568985 FBgn0063434_gypsy3 . +
+chr2L 2568264 2568985 FBgn0003490_springer . +
+chr2L 2568308 2568520 FBgn0067387_gypsy10 . +
+chr2L 2568308 2568517 FBgn0067384_gypsy7 . +
+chr2L 2568308 2568878 FBgn0063431_gypsy6 . +
+chr2L 2568308 2568703 FBgn0001167_gypsy . +
+chr2L 2568313 2568828 FBgn0002697_mdg1 . +
+chr2L 2568313 2568526 FBgn0000199_blood . +
+chr2L 2568329 2568982 FBgnnnnnnnn_HMS-Beagle2 . +
+chr2L 2568329 2568982 FBgn0001207_HMS-Beagle . +
+chr2L 2568378 2568648 FBgn0063897_Stalker4 . +
+chr2L 2568378 2568878 FBgn0063433_gypsy4 . +
+chr2L 2568384 2568646 FBgn0063435_gypsy2 . +
+chr2L 2568384 2568796 FBgn0002698_mdg3 . +
+chr2L 2569006 2569756 FBgn0063917_McClintock . +
+chr2L 2569007 2571200 FBgn0000004_17.6 . +
+chr2L 2569010 2571603 FBgn0000005_297 . +
+chr2L 2569018 2569804 FBgn0061485_rover . +
+chr2L 2569064 2570806 FBgn0044355_Quasimodo . +
+chr2L 2569064 2569752 FBgn0026065_Idefix . +
+chr2L 2569859 2571024 FBgn0061485_rover . +
+chr2L 2569987 2570809 FBgn0026065_Idefix . +
+chr2L 2570511 2570703 FBgn0063917_McClintock . +
+chr2L 2571048 2571200 FBgn0063917_McClintock . +
+chr2L 2571264 2571483 FBgn0000004_17.6 . +
+chr2L 2571466 2571603 FBgn0063450_Tom1 . +
+chr2L 2571468 2571592 FBgn0061485_rover . +
+chr2L 2661257 2663012 FBgn0001249_I-element . +
+chr2L 2713413 2713444 FBgn0063371_transib2 . -
+chr2L 2772652 2776969 FBgn0000005_297 . +
+chr2L 2772727 2772946 FBgn0000004_17.6 . +
+chr2L 2772929 2773066 FBgn0063450_Tom1 . +
+chr2L 2772931 2773084 FBgn0061485_rover . +
+chr2L 2772980 2773084 FBgn0063917_McClintock . +
+chr2L 2773736 2773910 FBgn0061485_rover . +
+chr2L 2774429 2776968 FBgn0061485_rover . +
+chr2L 2774429 2776968 FBgn0000004_17.6 . +
+chr2L 2774660 2774878 FBgn0063917_McClintock . +
+chr2L 2774727 2776980 FBgn0044355_Quasimodo . +
+chr2L 2775122 2776985 FBgn0026065_Idefix . +
+chr2L 2775124 2776969 FBgn0063917_McClintock . +
+chr2L 2775132 2776531 FBgn0063447_accord . +
+chr2L 2775183 2776509 FBgn0004082_Tirant . +
+chr2L 2775199 2776553 FBgn0063432_gypsy5 . +
+chr2L 2775199 2776494 FBgn0040267_Transpac . +
+chr2L 2775215 2776321 FBgn0063782_accord2 . -
+chr2L 2775216 2776513 FBgn0023131_ZAM . +
+chr2L 2775255 2776055 FBgn0003007_opus . +
+chr2L 2775313 2775759 FBgn0000006_412 . +
+chr2L 2775326 2776047 FBgn0063434_gypsy3 . +
+chr2L 2775326 2776047 FBgn0003490_springer . +
+chr2L 2775370 2775579 FBgn0067384_gypsy7 . +
+chr2L 2775370 2775765 FBgn0001167_gypsy . +
+chr2L 2775375 2775890 FBgn0002697_mdg1 . +
+chr2L 2775375 2775588 FBgn0000199_blood . +
+chr2L 2775391 2776044 FBgnnnnnnnn_HMS-Beagle2 . +
+chr2L 2775391 2776044 FBgn0001207_HMS-Beagle . +
+chr2L 2775429 2775767 FBgn0010302_Burdock . +
+chr2L 2775440 2775710 FBgn0063897_Stalker4 . +
+chr2L 2775440 2776515 FBgn0063433_gypsy4 . +
+chr2L 2775442 2775582 FBgn0067387_gypsy10 . +
+chr2L 2775446 2775858 FBgn0002698_mdg3 . +
+chr2L 2776093 2776340 FBgn0000199_blood . +
+chr2L 2776099 2776519 FBgnnnnnnnn_HMS-Beagle2 . +
+chr2L 2776156 2776324 FBgn0063436_gtwin . +
+chr2L 2776156 2776516 FBgn0063431_gypsy6 . +
+chr2L 2776179 2776389 FBgn0003007_opus . +
+chr2L 2776938 2777318 FBgn0063917_McClintock . +
+chr2L 2776958 2777320 FBgn0000004_17.6 . +
+chr2L 2776962 2777324 FBgn0061485_rover . +
+chr2L 2776962 2777324 FBgn0000005_297 . +
+chr2L 2776975 2777315 FBgn0044355_Quasimodo . +
+chr2L 2777321 2779175 FBgn0000005_297 . +
+chr2L 2777323 2778772 FBgn0000004_17.6 . +
+chr2L 2777510 2778596 FBgn0061485_rover . +
+chr2L 2777559 2778381 FBgn0026065_Idefix . +
+chr2L 2777565 2778378 FBgn0044355_Quasimodo . +
+chr2L 2778083 2778275 FBgn0063917_McClintock . +
+chr2L 2778620 2778772 FBgn0063917_McClintock . +
+chr2L 2778836 2779055 FBgn0000004_17.6 . +
+chr2L 2779038 2779175 FBgn0063450_Tom1 . +
+chr2L 2779040 2779164 FBgn0061485_rover . +
+chr2L 2933353 2935475 FBgn0003122_pogo . -
+chr2L 2945631 2945785 FBgn0000155_roo . +
+chr2L 2963474 2963538 FBgn0000155_roo . +
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_TE_annotation.gff3
--- a/test-data/test_TE_annotation.gff3 Wed Oct 26 07:24:45 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,934 +0,0 @@\n-chr2L\tFlyBase\tIdefix\t9726\t9859\t.\t+\t.\tID=FBti0060580;Name=Idefix{}2519;Alias=Idefix#20,TE60580;derived_cyto_location=21A5-21A5\n-chr2L\tFlyBase\tINE-1\t9888\t9949\t.\t-\t.\tID=FBti0059810;Name=INE-1{}1749;Alias=INE-1#4,TE59810;derived_cyto_location=21A5-21A5\n-chr2L\tFlyBase\tINE-1\t24236\t24462\t.\t+\t.\tID=FBti0059812;Name=INE-1{}1751;Alias=INE-1#5,TE59812;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t27530\t27788\t.\t-\t.\tID=FBti0059814;Name=INE-1{}1753;Alias=INE-1#6,TE59814;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tjockey\t47514\t52519\t.\t+\t.\tID=FBti0019092;Name=jockey{}277;Alias=jockey{}277,TE19092;gbunit=AE003590;derived_cyto_location=21B1-21B1,21A3-21A3;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t60240\t60580\t.\t+\t.\tID=FBti0059816;Name=INE-1{}1755;Alias=INE-1#7,TE59816;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t60663\t60773\t.\t+\t.\tID=FBti0059818;Name=INE-1{}1757;Alias=INE-1#8,TE59818;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tX\t64316\t64936\t.\t+\t.\tID=FBti0019093;Name=X{}278;Alias=X{}278,CG18303,TE19093;gbunit=AE003590;derived_cyto_location=21B1-21B1,21A4-21A4;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t65664\t65698\t.\t-\t.\tID=FBti0059819;Name=INE-1{}1758;Alias=INE-1#9,TE59819;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t116518\t116976\t.\t+\t.\tID=FBti0060557;Name=INE-1{}2496;Alias=INE-1#10,TE60557;derived_cyto_location=21B2-21B2;derived_comput_cyto_location=21B2\n-chr2L\tFlyBase\tMcClintock\t123547\t123635\t.\t-\t.\tID=FBti0060562;Name=McClintock{}2501;Alias=McClintock#15,TE60562;derived_cyto_location=21B2-21B2;derived_comput_cyto_location=21B2\n-chr2L\tFlyBase\tINE-1\t172692\t172724\t.\t+\t.\tID=FBti0060558;Name=INE-1{}2497;Alias=INE-1#11,TE60558;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tINE-1\t173743\t173778\t.\t-\t.\tID=FBti0060559;Name=INE-1{}2498;Alias=INE-1#12,TE60559;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tI\t176393\t176512\t.\t-\t.\tID=FBti0019095;Name=I{}279;Alias=I{}279,TE19095;gbunit=AE003590;derived_cyto_location=21B4-21B4,21B3-21B3;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tINE-1\t178279\t178467\t.\t+\t.\tID=FBti0060560;Name=INE-1{}2499;Alias=INE-1#13,TE60560;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tINE-1\t179822\t179909\t.\t+\t.\tID=FBti0060561;Name=INE-1{}2500;Alias=INE-1#14,TE60561;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tblood\t347941\t355383\t.\t-\t.\tID=FBti0019096;Name=blood{}280;Alias=blood{}280,TE19096;gbunit=AE003589;derived_cyto_location=21C1-21C1,21C2-21C2;derived_comput_cyto_location=21C1\n-chr2L\tFlyBase\tINE-1\t451600\t451692\t.\t-\t.\tID=FBti0063763;Name=INE-1{}5702;Alias=INE-1#10915,TE63763;derived_cyto_location=21C7-21C7;derived_comput_cyto_location=21C2\n-chr2L\tFlyBase\tINE-1\t634184\t634245\t.\t-\t.\tID=FBti0060585;Name=INE-1{}2524;Alias=INE-1#28,TE60585;derived_cyto_location=21E2-21E2;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\troo\t686974\t695955\t.\t-\t.\tID=FBti0019098;Name=roo{}281;Alias=roo{}281,TE19098;gbunit=AE003588;derived_cyto_location=21E2-21E2,21D2-21D2;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\tBari1\t770516\t772243\t.\t-\t.\tID=FBti0019099;Name=Bari1{}282;Alias=Bari1{}282,TE19099;gbunit=AE003588;derived_cyto_location=21E2-21E2,21D2-21D2;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\troo\t976935\t984512\t.\t+\t.\tID=FBti0019100;Name=roo{}283;Alias=roo{}283,TE19100;gbunit=AE003587;derived_cyto_location=21E2-21E2,21E1-21E1;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\troo\t996780\t1005816\t.\t-\t.\tID=FBti0019101;Name=roo{}284;Alias=roo{}284,TE19101;gbunit=AE003587;derived_cyto_location=21E3-21E3,21E2-21E2;derived_comput_cyto_location=21E3\n-chr2L\tFlyBase\tblood\t1220184\t1227592\t.\t+\t.\tID=FBti0019102;Name=blood{}285;Alias=blood{}285,TE19102;gbunit=AE003587;derived_cyto_location=21F2-21'..b'8;Alias=HMS-Beagle#1508,TE60549;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22409494\t22409773\t.\t-\t.\tID=FBti0060684;Name=invader1{}2623;Alias=invader1#1573,TE60684;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tinvader1\t22409895\t22410321\t.\t+\t.\tID=FBti0060551;Name=HMS-Beagle{}2490;Alias=HMS-Beagle#1509,TE60551;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle2\t22410897\t22411176\t.\t-\t.\tID=FBti0060686;Name=invader1{}2625;Alias=invader1#1574,TE60686;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22411294\t22411721\t.\t+\t.\tID=FBti0060390;Name=HMS-Beagle2{}2329;Alias=HMS-Beagle2#100081,TE60390;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tinvader1\t22412695\t22413121\t.\t+\t.\tID=FBti0060565;Name=HMS-Beagle{}2504;Alias=HMS-Beagle#1511,TE60565;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22413694\t22413973\t.\t-\t.\tID=FBti0060688;Name=invader1{}2627;Alias=invader1#1575,TE60688;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tinvader1\t22414091\t22414518\t.\t+\t.\tID=FBti0060391;Name=HMS-Beagle{}2330;Alias=HMS-Beagle#100082,TE60391;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22415073\t22415352\t.\t-\t.\tID=FBti0060689;Name=invader1{}2628;Alias=invader1#1576,TE60689;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tDoc\t22415470\t22415824\t.\t+\t.\tID=FBti0060570;Name=HMS-Beagle{}2509;Alias=HMS-Beagle#1513,TE60570;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tS\t22415856\t22420241\t.\t-\t.\tID=FBti0060540;Name=Doc{}2479;Alias=Doc#1502,TE60540;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tDHGP\troo\t22582024\t22583757\t.\t+\t.\tID=RR48839_transposable_element;Name=S{}RR48839;h-band_cyto_range=h35-h36;cyto_range=40D-40F\n-chr2L\tDHGP\tF\t22605666\t22614751\t.\t-\t.\tID=RR49033_transposable_element;Name=roo{}RR49033;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tMax\t22679852\t22684556\t.\t+\t.\tID=RR48902_transposable_element;Name=F{}RR48902;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tS\t22765149\t22773721\t.\t+\t.\tID=RR48810_transposable_element;Name=Max{}RR48810;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tGATE\t22776474\t22778207\t.\t+\t.\tID=RR48497_transposable_element;Name=S{}RR48497;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tinvader3\t22899297\t22907772\t.\t+\t.\tID=RR48945_transposable_element;Name=GATE{}RR48945;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tinvader3\t23057903\t23063349\t.\t-\t.\tID=RR49017_transposable_element;Name=invader3{}RR49017;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tFlyBase\tJuan\t23059027\t23060915\t.\t+\t.\tID=FBti0059748;Name=invader3{}1687;Alias=TE19812,TE19813,FBti0019812,FBti0019813,invader3#864,invader3{}454,invader3{}455,TE59748;derived_cyto_location=38C2-38C2\n-chr2L\tDHGP\tDoc\t23267989\t23272224\t.\t+\t.\tID=RR48345_transposable_element;Name=Juan{}RR48345;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tF\t23288997\t23293720\t.\t-\t.\tID=RR48348_transposable_element;Name=Doc{}RR48348;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tIvk\t23353253\t23357949\t.\t-\t.\tID=RR44129_transposable_element;Name=F{}RR44129;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\troo\t23382539\t23387909\t.\t+\t.\tID=RR48366_transposable_element;Name=Ivk{}RR48366;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tDoc\t23405712\t23414824\t.\t+\t.\tID=RR48370_transposable_element;Name=roo{}RR48370;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tMcClintock\t23467777\t23472472\t.\t+\t.\tID=RR41919_transposable_element;Name=Doc{}RR41919;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tBari1\t23475714\t23482166\t.\t+\t.\tID=RR48378_transposable_element;Name=McClintock{}RR48378;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n'
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_absence_out.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_absence_out.bed Mon Dec 05 09:58:47 2016 -0500
b
@@ -0,0 +1,6 @@
+Chr Start End TransposonName 5'_Junction 3'_Junction Variant Reference Frequency
+chr2L 2100429 2109522 FBgn0000155_roo 2100429(+),2100429(-) 2109523(+),2109523(-) 27 0 1.0000
+chr2L 2112167 2119772 FBgn0003007_opus 2112167(-) 2119773(-) 27 0 1.0000
+chr2L 2294096 2299243 FBgn0000349_copia 2294096(+),2294096(-) 2299244(+),2299244(-) 45 0 1.0000
+chr2L 2661257 2663012 FBgn0001249_I-element 2661257(+) 2663013(+) 28 0 1.0000
+chr2L 2933353 2935475 FBgn0003122_pogo 2933346(+),2933353(-) 2935469(+),2935476(-) 44 0 1.0000
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_chromosome.absence.refined.bp.summary
--- a/test-data/test_chromosome.absence.refined.bp.summary Wed Oct 26 07:24:45 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-Chr Start End TransposonName 5'_Junction 3'_Junction Variant Reference Frequency
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_chromosome.insertion.refined.bp.summary
--- a/test-data/test_chromosome.insertion.refined.bp.summary Wed Oct 26 07:24:45 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,8 +0,0 @@
-Chr Start End TransposonName TransposonDirection Class VariantSupport Frequency Junction1 Junction1Support Junction2 Junction2Support 5'_Support 3'_Support
-chr2L 8900590 8900703 FBgn0000481_Doc antisense 1p1 8 0.1667 8900646 0 8900646 0 5 3
-chr2L 8907814 8908314 FBgn0000155_roo antisense singleton 1 0.0179 8908064 0 8908064 0 0 1
-chr2L 8927841 8928341 FBgn0004141_HeT-A sense singleton 1 0.0333 8928091 0 8928091 0 0 1
-chr2L 8959585 8960085 FBgn0000481_Doc sense 2p 3 0.0236 8959835 0 8959835 0 0 3
-chr2L 8988151 8988651 FBgn0000155_roo antisense singleton 1 0.0159 8988401 0 8988401 0 1 0
-chr2L 8989135 8989635 FBgn0004141_HeT-A antisense singleton 0.5 0.0182 8989385 0 8989385 0 0 0.5
-chr2L 8989135 8989635 FBgn0000481_Doc antisense singleton 0.5 0.0182 8989385 0 8989385 0 0 0.5
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_chromosome.sorted.bam
b
Binary file test-data/test_chromosome.sorted.bam has changed
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_concensus.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_concensus.fa Mon Dec 05 09:58:47 2016 -0500
b
b'@@ -0,0 +1,1143 @@\n+>FBgn0010302_Burdock\n+AGTTAACACAATCACAAAACACCCGAAATATAGTCGTAAGCCTCAAGTGC\n+TTTTCCCATCTATAGATCGAGCTTTACCTATAAGAAACTGTAACTTGTTA\n+AGCTTTAGAGATAAGAACTCTTGCTATACTTAAGTCAGTCGATTTTGGAA\n+GATTAGAAGCGTCGGTCATCGCCACGTACTTACTATTCGTCTCATTAAGT\n+GCAGACCGCGCAAGCCTATTGTAATTAATAAACTTACGCTAATAAATATA\n+TGGAAAATCTACTAAAATGATAATTGGCGCCCAAACGGATATAAAAACCT\n+ACGATAACTGAATAATTATAAATAAATAACAAAAGGAGGATCCGGAGACA\n+AAACCAGCGGCTTTGGCTAATTAACTCTAACCTAAGAAATAAAAATTTGC\n+TGATTACATAAAATATAATATTAATTACTAAGACCATCTACCTTAAAATT\n+GTTTGTTAATCACTATTATTATATTGTAAGTATAACGCTTATTGAACGAA\n+TTAAAAATATTATTATTATTATTATATTATAACCTATGCAAAGAGTATTG\n+ATAATAAAAATACATGAGTGACAGTGATAACCTTTTAGACAACCTAGTGT\n+CAAGCTTAAATAAATGGTCAGCGCACCAGGCAAGTAGGCAAAACAGTGCA\n+GAAAAAAATAATAAGTCATCAGATAATTGGTGGTCAAAAACAAAGACAAC\n+TAGCGAAATGGAATTTGAAGCTCAGTTAAAAGCGATCGTAGAGAGTGCTG\n+TTGCCGGTGCGCTCGCAGTCCAAAAACAATCATTTGAAAAGCAATTGCAG\n+GAGATGAATGAGCGAATCGGGAAATTAACAGTGAACACCCCAGAGGTGGA\n+AACTTATGTAGATGCTGAAATTAGACCAGGTGTTGTCTGTAGCGAGCCTC\n+TAGATATACTTAAATCTCTGCCAGATTTTGATGGCAAAAGTGAAACATAT\n+GTGTCGTGGAGAAAAGCGGCTCATGTCGCTTTTAAAGTTTTCAAAGATTA\n+CGAGGGAAGTTCAACATTTTACCAAGCTCTTGGTATTATGCGAAATAAAA\n+TAAAAGGTCCAGCGAATACAGTATTGGCTTCTTTTAATACTCCGTTACAT\n+TTCAAAGCAATGATCAGCCGTCTTGATTTCACATATTCTGACAAAAGGCC\n+GATCTATCTAATCGAACAAGAGCTATCAACTTTGCGACAGGGAGACATGA\n+CTCTTACTGAATTCTACGATGAAGTCGAGAAAAAACTGACCCTACTTACC\n+AACAAGACAATAATGACATTTGATAGTGCCTTGGCGATGTCACTGAATGA\n+AAAGTACAGGACGGACGCGTTACGTGTATTTGTAACCGGAGCTAAGAAAT\n+CGTTGAGCGACATTCTTTTTGCAAAAGGTCCAAAAGATTTACCAACTGCT\n+CTCGCTTTAGCGCAAGAGGTCGAGTCGAACCATGAGCGTTACCAATTCGC\n+CCTTATTTATTCTAAAAATATTGGAGACAGGGGTCAGAAAATCGAACAAA\n+GGCACAGCGATAAGGATAGAAACTCAATCATGCCCATGCAAACTAAAAAC\n+CCATATTTTAGCAAGCGTCAGGTGCATACTTATGATAACCAGGAAAGACA\n+AGATCCAGTCCAGTTAACAAATCCTGATGTATCCATGCGATCTAGAAGAA\n+CTGGAAATTTTGGACAAACTCCATTTCCGACTCAGGGAAATATTTGGCCA\n+TCCCAACAGCAAAATTCTTGGCCATCTCAACAACAATATTCTTGGCCATC\n+CCAACAACAAAATTCATTTCGAACACAAAATCAATTCGCATCGCAACCCC\n+AACAGCAAAACACAAGTCAGGCTCAGGGACATTTTGGGTATGCGCAAGCA\n+TCAAAAAGACCAACGAGTGGCAGTGCAAGGTTTACAGGGCCAAAACAGCA\n+GAGGATCAACTACTTACCTCATGAGAAAGGTCAATGTGAGGAAGATACAG\n+ACGGTTATCAAAAGGAGGCAGAAGCGGAGGTTGATGATTATGAGGACGAA\n+CTAGTGAATTACGATCATGTTCATTTTTTAGCCACAAATCCCTGCTACCG\n+TACATAGAAAGAGAGATAGCAGGGAGAACCATAAAACTTTTGATTGACAC\n+CGGGGCTTCGAAAAATTACATACAGCCCCTCCCTGAATTAAAAAACATAA\n+TGCCGGTACAAAATAAATTCACGGTAAAATCGCTTCATGGTTGCAACACC\n+GTCAAACAGAAATGCTTTATTAAGCTATTTAACACATCTGTTCAATTCTT\n+TATTCTTCCAAGTCTCTCTAGTTTTGACGCAATAATAGGACTTGACCTTT\n+TGAAACAGGGAAATGCAACGTTAGATTTTAAGAACAAAACGTTGAATATC\n+AACAATGAAGTGGAATCTATTCAGTTTTTGAGATGTGACAGCGTAAATTT\n+CGCCAACATAGAGAATATTGTGGTTCCAAATCAGATATCTAATAAATTCC\n+ATACAATGCTTCGAAACCGATTGGCCGTCTTTGCGGAACCGGAAGAAGCA\n+CTGCCGTATAATACCAACATTGTTGCCACAATACGTACTGAGGACGACCA\n+ACCCATTTACTCAAAACTCTATCCGTACCCCATGGGCGTATCGGATTTTG\n+TGAATAAGGAGACACATGCTTTGTTAAAGGACGGAATTATCAGGCCCTCG\n+TCGTCACCTTACAACAATCCGGTTTGGGTAGTCGATAAAAAAGGTACAGA\n+TGAAGAGGGAAATACTAAGAAAAGGTTGGTTATAGATTTTAGAAAACTAA\n+ATTTAAAAACAATCGACGACAAGTACCCTATACCAAACGTAGTATGGATC\n+TTGTCAAATTTGGGAAAAGCCAGATTCTTTACAACCCTTGACCTTAAATC\n+GGCGTTTCACCAAATTCTGCTCGCAGAAAAGGATAGAGCGAAAACTGCCT\n+TTTCAGTAGGAAATGGAAAATACGAGTTTTGCCGTTTGCCGTTTGGCTTG\n+AAAAATGCCCCAAGTATTTTTCAACGTGCTATTGATGATGTTGTTAGGGA\n+CCGTATAGGAAAGTCATGTTACGTTTACGTTGACGACGTAATAATATTTT\n+CAAACGGAATTGAGGACCACGTAAACGACGTTGCTTGGGTACTAGACAGA\n+CTGTCTGGGGCAAACATGAGGGTTTCTAAAGAGAAATCGTTTTTCTTCAA\n+GGAAAGCGTCGAGTATCTCGGATTCATGGTGTCAAGTGGAGGTATCACAA\n+CCAGTCCTAGCAAAGTAGAGGCTATTCAGAAATATAATCAACCTACTAAT\n+CTGTTTAGTGTTCGATCGTTTTTAGGGCTAGCAAGTTATTACCGCTGCTT\n+TATTAAGGACTTCGCCTCTATTGCTAGACCACTCACTGACATTCTGAAGG\n+GTGAAAACGGAAAGGTTTCCGCAAGCCAGTCTAAAAAGATACCAATTTCT\n+TTCGATGAAAGACAATGTTCTGCTTTTGAGAAGCTTAAAAATGTTCTTGT\n+CTCCGAAAATGTAATGTTATTGTATCCCGATTATAGAAAAGCCTTTGACT\n+TAACAACAGACGCTTCGGCTTTTGGCCTGGGGGCAGTCTTATCACAGGAT\n+GGCAAGCCTGTTACAATGATTTCGAGAACTTTACAGGATAGAGAACTTAA\n+TTTCGCAACAAATGAACGAGAACTTTTGGCCATCGTTTGGGCTTTAAAGT\n+CTCTTAGGAACTATCTATATGGTGTCAAAAACTTAAACATTTTTACAGAT\n+CACCAGCCGTTAACATACGCCGTGTCAGATAGGAATCCAAATGCAAAAAT\n+CAAGAGATGGAAGGCGTTTATAGACGAACATAATGCTAAAATTTTCTATA\n+AACCT'..b'AAGTAATCAGACAAGTCAAATTACTCACTAACGA\n+AAAAACGGTGGTAGTACCAAATCAGGAGCTGCAACCAGGTATAATAGTAG\n+CAAGCACCATTGCCGATAGCAAAAACGCATTGATTCGCATTATAAATACA\n+AATAATAAAGACGCCATAATAGATAGCGCGAAGATCAAATGCGAATCAAT\n+GAAAGACTATGACATTTTTACAACACCAGTAGAAAAGGAAAATAGAACTG\n+AAGAAATTTTAAAACAATTAAGATTCCCTAAACAATTCAATAATGAACTA\n+ACTAAGTTATGCACCGAGTTTAGCGATATTTTTGGTCTAGAAACAGAACC\n+AATATCGGCTAACAATTTCTACAAACAAAAACTCAGATTAGGGGAAAAAA\n+CACCGGTCTATATAAAAAACTATCGCATGGCAGATAGCCAAAAACCAGAA\n+ATCGCCAGACAGGTAAAAAAATTAATAGATGATGGAATAGTTGAACCATC\n+AATGTCTGAATATAATAGTCCATTACTTTTGGTTCCAAAGAAACCACTTC\n+CGAATTCCACGGAAAAAAGATGGCGATTAGCAGTTGACTATCGTCAAATA\n+AATAAGAAACTATTATCAGACAAATTTCCACTTCCAAGAATAGAAGATAT\n+TCTTGATCAATTAGGAAGAGCAAAGTATTTTTCATGTCTCGACCTAATGT\n+CTGGATTCCACCAGATAGAACTAGAAAAAAGGTATAGAGATATAACGTCA\n+TTTTCAACAGCCAATGGCTCATATCGCTTCACGCGATTACCATACGGACT\n+GAAAGTAGCACCAAACTCCTTCCAACGTAGGATGACACTTGCATTTTCTG\n+GTCTTGAACCATCGCAAGCATTTCTATATATGGATGACTTAGTAGTAATA\n+GGTTGTTCAGAAAAACATATGCTCAAAAATTTGACTAACGTATTCGAGCT\n+ATGTAGACGACATAATTTGAAACTACATCCAGGGAAATGTTCTTTCTTTA\n+TGAAAGAAGTAACATATTTGGGTCACAAATGTACCGATAAAGGTATACTC\n+CCAGATGACACCAAATATGAAGTTATAGAAAAATATCCTATACCAACAGA\n+TGCCGACAGTGCTAGGCGTTTCGTAGCCTTCTGTAATTATTACAGACGTT\n+TCATTAAAAATTTTTCTGATCATTCACGCCACTTAACGAGGCTTTGTAAA\n+AAGAATGTTCAATTCGAATGGACAGCAGAATGCAATGATGCATTCGAATA\n+CCTTAAAACAGAATTAATGAAACCAACATTACTACAGTACCCAGATTTCG\n+GTAAAGAATTTTGCATAACAACCGATGCTAGTAAACAGGCATGCGGAGCG\n+GTACTTACACAAGATCACAATGGTCAACAACTTCCAGTGGCATACGCTTC\n+AAGAATGTTCACTCAAGGTGAAAGTAATAAGTCCACTACAGAACAAGAAT\n+TAACGGCCATTCATTGGGCCATAAATCATTTTCGACCATACATATATGGC\n+AAGCATTTCATGGTAAAAAGCGATCATAGACCATTGTCATACCTATTCTC\n+TATGAAAAATCCAAGTTCAAAACTCACTCGTATGAGGCTGGATTTAGAAG\n+AGTATGACTTTACTGTAGAATATCTTAAGGGGAAAGATAACCATATTGCG\n+GACGCCTTGTCTCGCATAACAATAAAAGATCTGAAAACAATCAACAGAGA\n+AATATTAAAAGTTACCACCAGATCAAAAGCTAAACAGGAAAATTCCTGTA\n+AGGACGAAGCAATAGTCAAAATACAAGAGGAAAAAGAGCAAACAATAGAA\n+AAGCCCAAAGTCTATGAAGTTGTCAATAATAATGACACAAAGAAATATGT\n+TTTAATCAAAATAGATAAACACAAGTGTTTATTAAAACGAGGAAAAACAA\n+TTGTTTCACGCTTTGATGTTGATGACTTGTATTCTAATGAAACATTTGAT\n+CTAAATCAATTCTTTCAAAGGCTTATTTCAAAAGCCGGAATGCATAAAAT\n+AACAAAAATGCGAATATCACCAAGCGAACAGATGTTCCAATTTGTATCAC\n+TAAATGAATTTAAAATAAAGGGCAACCGAGTACTCGAAAAAGTAGAACTA\n+GCTATTCTACAAAAGGTGATAATTATAGACAAAAATGACGAAGCTCAGAT\n+TAAAGAAATTTTGACAAAATTCCATGATGATCCTATAGAAGGAGGCCACA\n+CTGGTATTTCGCGAACCCAGTCAAAAATCAAAAGATTTTATTATTGGCCC\n+CAGATGACCAAGACAATCTCAAAGTATGTAAAGACTTGTTTGAAATGTCA\n+ACAAGCCAAAATTACAACACATACGAAAACTCCATTAACATTGATGCCAA\n+CGCCAGCAACAGCATTTGATACTGTTTTAATTGATACCATTGGTCCACTA\n+CCGAAATCGGAAGACGGAAATGAGTATGCAGTTACAATCATATGCGATCT\n+AACCAAGTTTTTAGTAACTATTCCAACACCAAATAAAAGTGCTAAAACAG\n+TTGCAAAGGCTATATTTGAATTATTTGTACTGAAGTACGGTCCAATGAAG\n+ACGTTCATTACAGATCAAGGTACGGAATACAAAAATTCACTTATGAATGA\n+ATTATGCAAATATATGCATATAGAAAATCTAACATCTAGCGCTCACCATC\n+ATCAAACTTTAGGAACAATAGAAAGAAGCCACCGAACTTTTAATGAATAT\n+ATACGTTCATACATATCGGTTAACAAAAGTGATTGGGACATTTGGTTACC\n+ATATTTCACTTATTGCTTCAATACAACACCCTCAATAGTCCATGACTATT\n+GCCCATACGAACTAGTATTTGGCAGACTACCCAGACAATTCAAAGATTTC\n+AGTAAGATAAACAAAATAGACCCAATATACAACTTAGACGACTACTCTAA\n+AGAGCTTAAATGCAGACTAGAATTGTCGTACAACAGAGCAAGAAGAATGT\n+TAGAAAAAGCAAAAGCGGATAGAAAATTAAGATATGATAGGAATACAAAT\n+AATTTCGAATTAAAAATAGGAGATAAAGTATTACTTAGAAAAGAAACAGG\n+TCATAAGTTAGATAAAAGATATGAAGGTCCTTATGACGTAGTAGATATAG\n+GAATAAATGACAATATAACCATTAAAACAGGAAGTAAGAAACAACAAATA\n+GTACATAAAGATAGGCTAAAAAAGCACAAATAGAATGAAAAAAAAAAAGG\n+GCAATCAATGCCAAACCTTTCATAATAAAACTTAAATAACGGCCTGATCA\n+GCCAAAACAATATAACAAAGACATAGACATAATCGAATTTTTATTAATTC\n+AAAATACATACATATTTTTTCTTTATTCATTTAAAAATTCTATATCATAA\n+ATAATGTTAATTCATTAAAAATAATATTTAAGTAATTTTTATTTTATAAT\n+GGTAATATAGTTGATAGAAAATAACTTCATTTCTTTACGTTATTTTAAAA\n+AAGAGGGGAGGTGTAGTATGTGCATATATCGAGGGTACACTGTACCTATA\n+AGTACACAGCAACACTTAGTTGCATTGCATAAATAAATGTCTCAAGTGAG\n+CGTGATATAAGATCACCCATTTATGCTTTAAGCTAAGTCAGCATCCCCAC\n+GCTGGCCGCTGGCCATATATGCGCATAAGCTCTCTCTCTCTCTCTCTTAT\n+ACATATATATATACGCTGCTCTTCTGCCGCTGTCGACGGCGGCGCAGTCG\n+CAGTATTTAGGTAAGATTAGACACTCTGTAGAGGTTAAGCGGGCAGAACC\n+GTTTCTGCTACTCGAAGAGATAAGAAGAAATAAAAAGGTGGCCTGACGGC\n+TGCACCCAACTGCAAGGAAAACACGTGTTCTCAATTGGTGGCATATATTG\n+GTTTATTACA\n'
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_consensus.fa
--- a/test-data/test_consensus.fa Wed Oct 26 07:24:45 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1143 +0,0 @@\n->FBgn0010302_Burdock\n-AGTTAACACAATCACAAAACACCCGAAATATAGTCGTAAGCCTCAAGTGC\n-TTTTCCCATCTATAGATCGAGCTTTACCTATAAGAAACTGTAACTTGTTA\n-AGCTTTAGAGATAAGAACTCTTGCTATACTTAAGTCAGTCGATTTTGGAA\n-GATTAGAAGCGTCGGTCATCGCCACGTACTTACTATTCGTCTCATTAAGT\n-GCAGACCGCGCAAGCCTATTGTAATTAATAAACTTACGCTAATAAATATA\n-TGGAAAATCTACTAAAATGATAATTGGCGCCCAAACGGATATAAAAACCT\n-ACGATAACTGAATAATTATAAATAAATAACAAAAGGAGGATCCGGAGACA\n-AAACCAGCGGCTTTGGCTAATTAACTCTAACCTAAGAAATAAAAATTTGC\n-TGATTACATAAAATATAATATTAATTACTAAGACCATCTACCTTAAAATT\n-GTTTGTTAATCACTATTATTATATTGTAAGTATAACGCTTATTGAACGAA\n-TTAAAAATATTATTATTATTATTATATTATAACCTATGCAAAGAGTATTG\n-ATAATAAAAATACATGAGTGACAGTGATAACCTTTTAGACAACCTAGTGT\n-CAAGCTTAAATAAATGGTCAGCGCACCAGGCAAGTAGGCAAAACAGTGCA\n-GAAAAAAATAATAAGTCATCAGATAATTGGTGGTCAAAAACAAAGACAAC\n-TAGCGAAATGGAATTTGAAGCTCAGTTAAAAGCGATCGTAGAGAGTGCTG\n-TTGCCGGTGCGCTCGCAGTCCAAAAACAATCATTTGAAAAGCAATTGCAG\n-GAGATGAATGAGCGAATCGGGAAATTAACAGTGAACACCCCAGAGGTGGA\n-AACTTATGTAGATGCTGAAATTAGACCAGGTGTTGTCTGTAGCGAGCCTC\n-TAGATATACTTAAATCTCTGCCAGATTTTGATGGCAAAAGTGAAACATAT\n-GTGTCGTGGAGAAAAGCGGCTCATGTCGCTTTTAAAGTTTTCAAAGATTA\n-CGAGGGAAGTTCAACATTTTACCAAGCTCTTGGTATTATGCGAAATAAAA\n-TAAAAGGTCCAGCGAATACAGTATTGGCTTCTTTTAATACTCCGTTACAT\n-TTCAAAGCAATGATCAGCCGTCTTGATTTCACATATTCTGACAAAAGGCC\n-GATCTATCTAATCGAACAAGAGCTATCAACTTTGCGACAGGGAGACATGA\n-CTCTTACTGAATTCTACGATGAAGTCGAGAAAAAACTGACCCTACTTACC\n-AACAAGACAATAATGACATTTGATAGTGCCTTGGCGATGTCACTGAATGA\n-AAAGTACAGGACGGACGCGTTACGTGTATTTGTAACCGGAGCTAAGAAAT\n-CGTTGAGCGACATTCTTTTTGCAAAAGGTCCAAAAGATTTACCAACTGCT\n-CTCGCTTTAGCGCAAGAGGTCGAGTCGAACCATGAGCGTTACCAATTCGC\n-CCTTATTTATTCTAAAAATATTGGAGACAGGGGTCAGAAAATCGAACAAA\n-GGCACAGCGATAAGGATAGAAACTCAATCATGCCCATGCAAACTAAAAAC\n-CCATATTTTAGCAAGCGTCAGGTGCATACTTATGATAACCAGGAAAGACA\n-AGATCCAGTCCAGTTAACAAATCCTGATGTATCCATGCGATCTAGAAGAA\n-CTGGAAATTTTGGACAAACTCCATTTCCGACTCAGGGAAATATTTGGCCA\n-TCCCAACAGCAAAATTCTTGGCCATCTCAACAACAATATTCTTGGCCATC\n-CCAACAACAAAATTCATTTCGAACACAAAATCAATTCGCATCGCAACCCC\n-AACAGCAAAACACAAGTCAGGCTCAGGGACATTTTGGGTATGCGCAAGCA\n-TCAAAAAGACCAACGAGTGGCAGTGCAAGGTTTACAGGGCCAAAACAGCA\n-GAGGATCAACTACTTACCTCATGAGAAAGGTCAATGTGAGGAAGATACAG\n-ACGGTTATCAAAAGGAGGCAGAAGCGGAGGTTGATGATTATGAGGACGAA\n-CTAGTGAATTACGATCATGTTCATTTTTTAGCCACAAATCCCTGCTACCG\n-TACATAGAAAGAGAGATAGCAGGGAGAACCATAAAACTTTTGATTGACAC\n-CGGGGCTTCGAAAAATTACATACAGCCCCTCCCTGAATTAAAAAACATAA\n-TGCCGGTACAAAATAAATTCACGGTAAAATCGCTTCATGGTTGCAACACC\n-GTCAAACAGAAATGCTTTATTAAGCTATTTAACACATCTGTTCAATTCTT\n-TATTCTTCCAAGTCTCTCTAGTTTTGACGCAATAATAGGACTTGACCTTT\n-TGAAACAGGGAAATGCAACGTTAGATTTTAAGAACAAAACGTTGAATATC\n-AACAATGAAGTGGAATCTATTCAGTTTTTGAGATGTGACAGCGTAAATTT\n-CGCCAACATAGAGAATATTGTGGTTCCAAATCAGATATCTAATAAATTCC\n-ATACAATGCTTCGAAACCGATTGGCCGTCTTTGCGGAACCGGAAGAAGCA\n-CTGCCGTATAATACCAACATTGTTGCCACAATACGTACTGAGGACGACCA\n-ACCCATTTACTCAAAACTCTATCCGTACCCCATGGGCGTATCGGATTTTG\n-TGAATAAGGAGACACATGCTTTGTTAAAGGACGGAATTATCAGGCCCTCG\n-TCGTCACCTTACAACAATCCGGTTTGGGTAGTCGATAAAAAAGGTACAGA\n-TGAAGAGGGAAATACTAAGAAAAGGTTGGTTATAGATTTTAGAAAACTAA\n-ATTTAAAAACAATCGACGACAAGTACCCTATACCAAACGTAGTATGGATC\n-TTGTCAAATTTGGGAAAAGCCAGATTCTTTACAACCCTTGACCTTAAATC\n-GGCGTTTCACCAAATTCTGCTCGCAGAAAAGGATAGAGCGAAAACTGCCT\n-TTTCAGTAGGAAATGGAAAATACGAGTTTTGCCGTTTGCCGTTTGGCTTG\n-AAAAATGCCCCAAGTATTTTTCAACGTGCTATTGATGATGTTGTTAGGGA\n-CCGTATAGGAAAGTCATGTTACGTTTACGTTGACGACGTAATAATATTTT\n-CAAACGGAATTGAGGACCACGTAAACGACGTTGCTTGGGTACTAGACAGA\n-CTGTCTGGGGCAAACATGAGGGTTTCTAAAGAGAAATCGTTTTTCTTCAA\n-GGAAAGCGTCGAGTATCTCGGATTCATGGTGTCAAGTGGAGGTATCACAA\n-CCAGTCCTAGCAAAGTAGAGGCTATTCAGAAATATAATCAACCTACTAAT\n-CTGTTTAGTGTTCGATCGTTTTTAGGGCTAGCAAGTTATTACCGCTGCTT\n-TATTAAGGACTTCGCCTCTATTGCTAGACCACTCACTGACATTCTGAAGG\n-GTGAAAACGGAAAGGTTTCCGCAAGCCAGTCTAAAAAGATACCAATTTCT\n-TTCGATGAAAGACAATGTTCTGCTTTTGAGAAGCTTAAAAATGTTCTTGT\n-CTCCGAAAATGTAATGTTATTGTATCCCGATTATAGAAAAGCCTTTGACT\n-TAACAACAGACGCTTCGGCTTTTGGCCTGGGGGCAGTCTTATCACAGGAT\n-GGCAAGCCTGTTACAATGATTTCGAGAACTTTACAGGATAGAGAACTTAA\n-TTTCGCAACAAATGAACGAGAACTTTTGGCCATCGTTTGGGCTTTAAAGT\n-CTCTTAGGAACTATCTATATGGTGTCAAAAACTTAAACATTTTTACAGAT\n-CACCAGCCGTTAACATACGCCGTGTCAGATAGGAATCCAAATGCAAAAAT\n-CAAGAGATGGAAGGCGTTTATAGACGAACATAATGCTAAAATTTTCTATA\n-AACCT'..b'AAGTAATCAGACAAGTCAAATTACTCACTAACGA\n-AAAAACGGTGGTAGTACCAAATCAGGAGCTGCAACCAGGTATAATAGTAG\n-CAAGCACCATTGCCGATAGCAAAAACGCATTGATTCGCATTATAAATACA\n-AATAATAAAGACGCCATAATAGATAGCGCGAAGATCAAATGCGAATCAAT\n-GAAAGACTATGACATTTTTACAACACCAGTAGAAAAGGAAAATAGAACTG\n-AAGAAATTTTAAAACAATTAAGATTCCCTAAACAATTCAATAATGAACTA\n-ACTAAGTTATGCACCGAGTTTAGCGATATTTTTGGTCTAGAAACAGAACC\n-AATATCGGCTAACAATTTCTACAAACAAAAACTCAGATTAGGGGAAAAAA\n-CACCGGTCTATATAAAAAACTATCGCATGGCAGATAGCCAAAAACCAGAA\n-ATCGCCAGACAGGTAAAAAAATTAATAGATGATGGAATAGTTGAACCATC\n-AATGTCTGAATATAATAGTCCATTACTTTTGGTTCCAAAGAAACCACTTC\n-CGAATTCCACGGAAAAAAGATGGCGATTAGCAGTTGACTATCGTCAAATA\n-AATAAGAAACTATTATCAGACAAATTTCCACTTCCAAGAATAGAAGATAT\n-TCTTGATCAATTAGGAAGAGCAAAGTATTTTTCATGTCTCGACCTAATGT\n-CTGGATTCCACCAGATAGAACTAGAAAAAAGGTATAGAGATATAACGTCA\n-TTTTCAACAGCCAATGGCTCATATCGCTTCACGCGATTACCATACGGACT\n-GAAAGTAGCACCAAACTCCTTCCAACGTAGGATGACACTTGCATTTTCTG\n-GTCTTGAACCATCGCAAGCATTTCTATATATGGATGACTTAGTAGTAATA\n-GGTTGTTCAGAAAAACATATGCTCAAAAATTTGACTAACGTATTCGAGCT\n-ATGTAGACGACATAATTTGAAACTACATCCAGGGAAATGTTCTTTCTTTA\n-TGAAAGAAGTAACATATTTGGGTCACAAATGTACCGATAAAGGTATACTC\n-CCAGATGACACCAAATATGAAGTTATAGAAAAATATCCTATACCAACAGA\n-TGCCGACAGTGCTAGGCGTTTCGTAGCCTTCTGTAATTATTACAGACGTT\n-TCATTAAAAATTTTTCTGATCATTCACGCCACTTAACGAGGCTTTGTAAA\n-AAGAATGTTCAATTCGAATGGACAGCAGAATGCAATGATGCATTCGAATA\n-CCTTAAAACAGAATTAATGAAACCAACATTACTACAGTACCCAGATTTCG\n-GTAAAGAATTTTGCATAACAACCGATGCTAGTAAACAGGCATGCGGAGCG\n-GTACTTACACAAGATCACAATGGTCAACAACTTCCAGTGGCATACGCTTC\n-AAGAATGTTCACTCAAGGTGAAAGTAATAAGTCCACTACAGAACAAGAAT\n-TAACGGCCATTCATTGGGCCATAAATCATTTTCGACCATACATATATGGC\n-AAGCATTTCATGGTAAAAAGCGATCATAGACCATTGTCATACCTATTCTC\n-TATGAAAAATCCAAGTTCAAAACTCACTCGTATGAGGCTGGATTTAGAAG\n-AGTATGACTTTACTGTAGAATATCTTAAGGGGAAAGATAACCATATTGCG\n-GACGCCTTGTCTCGCATAACAATAAAAGATCTGAAAACAATCAACAGAGA\n-AATATTAAAAGTTACCACCAGATCAAAAGCTAAACAGGAAAATTCCTGTA\n-AGGACGAAGCAATAGTCAAAATACAAGAGGAAAAAGAGCAAACAATAGAA\n-AAGCCCAAAGTCTATGAAGTTGTCAATAATAATGACACAAAGAAATATGT\n-TTTAATCAAAATAGATAAACACAAGTGTTTATTAAAACGAGGAAAAACAA\n-TTGTTTCACGCTTTGATGTTGATGACTTGTATTCTAATGAAACATTTGAT\n-CTAAATCAATTCTTTCAAAGGCTTATTTCAAAAGCCGGAATGCATAAAAT\n-AACAAAAATGCGAATATCACCAAGCGAACAGATGTTCCAATTTGTATCAC\n-TAAATGAATTTAAAATAAAGGGCAACCGAGTACTCGAAAAAGTAGAACTA\n-GCTATTCTACAAAAGGTGATAATTATAGACAAAAATGACGAAGCTCAGAT\n-TAAAGAAATTTTGACAAAATTCCATGATGATCCTATAGAAGGAGGCCACA\n-CTGGTATTTCGCGAACCCAGTCAAAAATCAAAAGATTTTATTATTGGCCC\n-CAGATGACCAAGACAATCTCAAAGTATGTAAAGACTTGTTTGAAATGTCA\n-ACAAGCCAAAATTACAACACATACGAAAACTCCATTAACATTGATGCCAA\n-CGCCAGCAACAGCATTTGATACTGTTTTAATTGATACCATTGGTCCACTA\n-CCGAAATCGGAAGACGGAAATGAGTATGCAGTTACAATCATATGCGATCT\n-AACCAAGTTTTTAGTAACTATTCCAACACCAAATAAAAGTGCTAAAACAG\n-TTGCAAAGGCTATATTTGAATTATTTGTACTGAAGTACGGTCCAATGAAG\n-ACGTTCATTACAGATCAAGGTACGGAATACAAAAATTCACTTATGAATGA\n-ATTATGCAAATATATGCATATAGAAAATCTAACATCTAGCGCTCACCATC\n-ATCAAACTTTAGGAACAATAGAAAGAAGCCACCGAACTTTTAATGAATAT\n-ATACGTTCATACATATCGGTTAACAAAAGTGATTGGGACATTTGGTTACC\n-ATATTTCACTTATTGCTTCAATACAACACCCTCAATAGTCCATGACTATT\n-GCCCATACGAACTAGTATTTGGCAGACTACCCAGACAATTCAAAGATTTC\n-AGTAAGATAAACAAAATAGACCCAATATACAACTTAGACGACTACTCTAA\n-AGAGCTTAAATGCAGACTAGAATTGTCGTACAACAGAGCAAGAAGAATGT\n-TAGAAAAAGCAAAAGCGGATAGAAAATTAAGATATGATAGGAATACAAAT\n-AATTTCGAATTAAAAATAGGAGATAAAGTATTACTTAGAAAAGAAACAGG\n-TCATAAGTTAGATAAAAGATATGAAGGTCCTTATGACGTAGTAGATATAG\n-GAATAAATGACAATATAACCATTAAAACAGGAAGTAAGAAACAACAAATA\n-GTACATAAAGATAGGCTAAAAAAGCACAAATAGAATGAAAAAAAAAAAGG\n-GCAATCAATGCCAAACCTTTCATAATAAAACTTAAATAACGGCCTGATCA\n-GCCAAAACAATATAACAAAGACATAGACATAATCGAATTTTTATTAATTC\n-AAAATACATACATATTTTTTCTTTATTCATTTAAAAATTCTATATCATAA\n-ATAATGTTAATTCATTAAAAATAATATTTAAGTAATTTTTATTTTATAAT\n-GGTAATATAGTTGATAGAAAATAACTTCATTTCTTTACGTTATTTTAAAA\n-AAGAGGGGAGGTGTAGTATGTGCATATATCGAGGGTACACTGTACCTATA\n-AGTACACAGCAACACTTAGTTGCATTGCATAAATAAATGTCTCAAGTGAG\n-CGTGATATAAGATCACCCATTTATGCTTTAAGCTAAGTCAGCATCCCCAC\n-GCTGGCCGCTGGCCATATATGCGCATAAGCTCTCTCTCTCTCTCTCTTAT\n-ACATATATATATACGCTGCTCTTCTGCCGCTGTCGACGGCGGCGCAGTCG\n-CAGTATTTAGGTAAGATTAGACACTCTGTAGAGGTTAAGCGGGCAGAACC\n-GTTTCTGCTACTCGAAGAGATAAGAAGAAATAAAAAGGTGGCCTGACGGC\n-TGCACCCAACTGCAAGGAAAACACGTGTTCTCAATTGGTGGCATATATTG\n-GTTTATTACA\n'
b
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_insertions_out.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_insertions_out.bed Mon Dec 05 09:58:47 2016 -0500
b
@@ -0,0 +1,11 @@
+Chr Start End TransposonName TransposonDirection Class VariantSupport Frequency Junction1 Junction1Support Junction2 Junction2Support 5'_Support 3'_Support
+chr2L 2003846 2003873 FBgn0003122_pogo sense 1p1 39 1.0000 2003871 2 2003871 5 18 14
+chr2L 2131300 2131312 FBgn0001283_jockey sense 1p1 37 1.0000 2131304 1 2131306 6 11 19
+chr2L 2397913 2397938 FBgn0000155_roo sense 1p1 47 1.0000 2397942 5 2397943 5 19 18
+chr2L 2412909 2412937 FBgn0003055_P-element sense 1p1 45 0.9783 2412907 2 2412908 3 21 19
+chr2L 2569095 2569595 FBgn0004141_HeT-A sense 2p 6 1.0000 2569345 0 2569345 0 6 0
+chr2L 2714434 2714458 FBgn0000349_copia sense 1p1 40 1.0000 2714437 8 2714440 4 12 16
+chr2L 2763527 2763539 FBgn0000199_blood sense 1p1 39 0.9070 2763533 0 2763533 0 19 20
+chr2L 2920516 2920519 FBgn0010302_Burdock sense 1p1 39 1.0000 2920517 4 2920518 4 15 16
+chr2L 2965217 2965244 FBgn0001167_gypsy sense 1p1 43 0.9348 2965230 0 2965230 0 24 19
+chr2L 2966906 2966920 FBgn0000481_Doc sense 1p1 50 0.9804 2966910 2 2966911 3 32 13