Previous changeset 20:6e02b9179a24 (2016-10-26) |
Commit message:
planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty |
modified:
scripts/TEMP_Absence.sh scripts/TEMP_Insertion.sh scripts/make.bp.bed.pl scripts/pickClippedFastq.pl scripts/pickOverlapPair.ex.pl scripts/pickOverlapPair.ex_MEM.pl scripts/pickOverlapPair.in.pl scripts/pickSoftClipping.over.pl scripts/pickUniqIntervalPos.pl scripts/pickUniqMate.pl scripts/pickUniqPairFastq.pl scripts/pickUniqPairFastq_MEM.pl scripts/pickUniqPos.pl scripts/pickUniqPos_MEM.pl |
added:
scripts/pickUniqMate.pl.orig scripts/pickUniqPairFastq.pl.orig scripts/pickUniqPairFastq_MEM.pl.orig scripts/pickUniqPos.pl.orig scripts/pickUniqPos_MEM.pl.orig temp_absences.xml temp_insertions.xml test-data/dm3_chr2L.2bit test-data/test_TE_annotation.bed test-data/test_absence_out.bed test-data/test_chromosome.sorted.bam test-data/test_concensus.fa test-data/test_insertions_out.bed |
removed:
temp.xml test-data/chr2l_bwa_mem.bam test-data/dm6_chr2l.twobit test-data/test_TE_annotation.gff3 test-data/test_chromosome.absence.refined.bp.summary test-data/test_chromosome.insertion.refined.bp.summary test-data/test_consensus.fa |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/TEMP_Absence.sh --- a/scripts/TEMP_Absence.sh Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/TEMP_Absence.sh Mon Dec 05 09:58:47 2016 -0500 |
b |
@@ -116,7 +116,7 @@ fi #Detect excision sites -samtools view -XF 0x2 $name > $i.unpair.sam +samtools view -F 0x2 $name > $i.unpair.sam awk -F "\t" '{OFS="\t"; if ($9 != 0) print $0}' $i.unpair.sam > temp1.sam perl $BINDIR/pickUniqIntervalPos.pl temp1.sam $INSERT > $i.unproper.uniq.interval.bed |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/TEMP_Insertion.sh --- a/scripts/TEMP_Insertion.sh Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/TEMP_Insertion.sh Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -126,7 +126,7 @@ fi # Get the mate seq of the uniq-unpaired reads -samtools view -XF 0x2 $name > $i.unpair.sam +samtools view -F 0x2 $name > $i.unpair.sam if [[ $SCORE -eq 0 ]] then perl $BINDIR/pickUniqPairFastq.pl $i.unpair.sam $i.unpair.uniq @@ -144,7 +144,7 @@ #Summary -samtools view -hSXF 0x2 $i.unpair.uniq.transposons.sam > $i.unpair.uniq.transposons.unpair.sam +samtools view -hSF 0x2 $i.unpair.uniq.transposons.sam > $i.unpair.uniq.transposons.unpair.sam perl $BINDIR/pickUniqMate.pl $i.unpair.uniq.transposons.unpair.sam $i.unpair.uniq.bed > $i.unpair.uniq.transposons.bed cp $i.unpair.uniq.transposons.bed $i.unpair.uniq.transposons.filtered.bed |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/make.bp.bed.pl --- a/scripts/make.bp.bed.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/make.bp.bed.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -18,7 +18,7 @@ my $l=$i+3; system("cut -f7,4,6,$j,$k,$l $ARGV[0] > temp"); open (input, "<temp") or die "Can't open temp since $!\n"; - open (output, ">>$name.insertion.bp.bed") or die "Can't open $name.insertion.bp.bed since $!\n"; + open (output, ">$name.insertion.bp.bed") or die "Can't open $name.insertion.bp.bed since $!\n"; my $header=<input>; while (my $line=<input>) { chomp($line); @@ -26,6 +26,7 @@ if (($b[4] ne "0")||($b[5] ne "0")) { my @c=split(/\:/, $b[2]); my @d=split(/\./, $c[1]); + if ($c[0] eq "P") {next;} if ($d[0] > $d[1]) { my $temp=$d[0]; $d[0]=$d[1]; @@ -33,9 +34,9 @@ } my $lower=$d[0]; my $upper=$d[1]; - if (($lower >= 0) && ($upper >= 0)) { - print output "$c[0]\t$lower\t$upper\t$b[0]\t$b[1]\t$b[3]\t$b[4]\t$b[5]\n"; - } + if (($lower >= 0) && ($upper >= 0)) { + print output "$c[0]\t$lower\t$upper\t$b[0]\t$b[1]\t$b[3]\t$b[4]\t$b[5]\n"; + } $chrs{$c[0]}=1; } } @@ -85,25 +86,35 @@ } if (-s "tmp2") { - system("bedtools subtract -a tmp -b tmp2 -f 1.0 > tmp3"); - open (input, "<tmp3") or die "Can't open tmp3 since $!\n"; + my %to_filter=(); + open (input, "<tmp2") or die "Can't open tmp2 since $!\n"; + while (my $line=<input>) { + chomp($line); + my @a=split(/\t/, $line); + $to_filter{"$a[0]\:$a[1]\:$a[2]\:$a[3]\:$a[5]"}=1; + } + close input; + open (input, "<tmp") or die "Can't open tmp since $!\n"; open (output, ">$name.insertion.bp.bed") or die "Can't open $name.insertion.bp.bed since $!\n"; while (my $line=<input>) { chomp($line); my @a=split(/\t/, $line); - my $direction="sense"; - if ($a[5] eq "-") {$direction="antisense";} - my $chr_num=$a[0]; - $chr_num =~ s/chr//; - if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];} - print output "$chr_num\t$a[1]\t$a[2]\t$a[3]\t$direction\t$a[6]\t$a[7]\t$a[8]\n"; + if (!defined $to_filter{"$a[0]\:$a[1]\:$a[2]\:$a[3]\:$a[5]"}) { + my $direction="sense"; + if ($a[5] eq "-") {$direction="antisense";} + my $chr_num=$a[0]; + $chr_num =~ s/chr//; + if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];} + print output "$chr_num\t$a[1]\t$a[2]\t$a[3]\t$direction\t$a[6]\t$a[7]\t$a[8]\n"; + } } close input; close output; } - } - system("rm tmp*"); + system("rm tmp*"); + + } } } |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickClippedFastq.pl --- a/scripts/pickClippedFastq.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickClippedFastq.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -38,7 +38,7 @@ my $upper=$a[2]+15; if (($lower > 0)&&($upper > 0)) { - system("samtools view -hXf 0x2 $ARGV[0].sorted.bam $a[0]\:$lower\-$upper > temp.sam"); + system("samtools view -hf 0x2 $ARGV[0].sorted.bam $a[0]\:$lower\-$upper > temp.sam"); open in,"temp.sam"; my %pe1; @@ -48,7 +48,9 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my ($xt)=$f[11]=~/XT:A:(.)/; @@ -62,7 +64,7 @@ my $clipseq=""; my @z=split(/M/, $f[5]); - if (($f[5]=~/S$/)&&($f[1]=~/r/)) + if (($f[5]=~/S$/)&&(($f[1] & 16) == 16)) { my (@cigar_m)=$f[5]=~/(\d+)M/g; my (@cigar_d)=$f[5]=~/(\d+)D/g; @@ -79,7 +81,7 @@ } } - elsif (($f[1]=~/R/)&&($z[0]=~/S/)) + elsif ((($f[1] & 32) == 32)&&($z[0]=~/S/)) { $coor=$f[3]; $strand="+"; @@ -121,7 +123,9 @@ { chomp; my @f=split/\t/,$_,12; - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} my ($xt)=$f[11]=~/XT:A:(.)/; if ($f[5]=~/S/) { @@ -132,7 +136,7 @@ my $clipseq=""; my @z=split(/M/, $f[5]); - if (($f[5]=~/S$/)&&($f[1]=~/r/)) + if (($f[5]=~/S$/)&&(($f[1] & 16) == 16)) { my (@cigar_m)=$f[5]=~/(\d+)M/g; my (@cigar_d)=$f[5]=~/(\d+)D/g; @@ -149,7 +153,7 @@ } } - elsif (($f[1]=~/R/)&&($z[0]=~/S/)) + elsif ((($f[1] & 32) == 32)&&($z[0]=~/S/)) { $coor=$f[3]; $strand="+"; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickOverlapPair.ex.pl --- a/scripts/pickOverlapPair.ex.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickOverlapPair.ex.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -56,7 +56,7 @@ my $chr_num=$a[0]; $chr_num =~ s/chr//; if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];} - system("samtools view -Xf 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam"); + system("samtools view -f 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam"); open in,"temp.sam"; my %ps=(); @@ -70,14 +70,16 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my ($xt)=$f[11]=~/XT:A:(.)/; ## Coordinate my $coor=$f[3]; - if ($f[1]=~/r/) + if (($f[1] & 16) == 16) { if ($xt eq "U") {$uniqm{$f[0]}=1;} my (@cigar_m)=$f[5]=~/(\d+)M/g; @@ -87,7 +89,7 @@ my $aln_ln=sum(@cigar_m,@cigar_d); $me{$f[0]}=$f[3]+$aln_ln-1; } - elsif ($f[1]=~/R/) { + elsif (($f[1] & 32) == 32) { $ps{$f[0]}=$f[3]; if ($xt eq "U") {$uniqp{$f[0]}=1;} } |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickOverlapPair.ex_MEM.pl --- a/scripts/pickOverlapPair.ex_MEM.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickOverlapPair.ex_MEM.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -56,7 +56,7 @@ my $chr_num=$a[0]; $chr_num =~ s/chr//; if (($chrs{$a[0]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[0];} - system("samtools view -Xf 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam"); + system("samtools view -f 0x2 $title $chr_num\:$leftlower\-$leftupper $chr_num\:$rightlower\-$rightupper > temp.sam"); open in,"temp.sam"; my %ps=(); @@ -70,7 +70,9 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my $xt=""; @@ -92,7 +94,7 @@ ## Coordinate my $coor=$f[3]; - if ($f[1]=~/r/) + if (($f[1] & 16) == 16) { if ($xt eq "U") {$uniqm{$f[0]}=1;} my (@cigar_m)=$f[5]=~/(\d+)M/g; @@ -102,7 +104,7 @@ my $aln_ln=sum(@cigar_m,@cigar_d); $me{$f[0]}=$f[3]+$aln_ln-1; } - elsif ($f[1]=~/R/) { + elsif (($f[1] & 32) == 32) { $ps{$f[0]}=$f[3]; if ($xt eq "U") {$uniqp{$f[0]}=1;} } |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickOverlapPair.in.pl --- a/scripts/pickOverlapPair.in.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickOverlapPair.in.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -29,7 +29,7 @@ } my $lower=$b[0]-$frag; my $upper=$c[0]+$frag; - system("samtools view -Xf 0x2 $title $a[0]\:$lower\-$upper > temp.sam"); + system("samtools view -f 0x2 $title $a[0]\:$lower\-$upper > temp.sam"); open in,"temp.sam"; my %ps=(); @@ -41,13 +41,15 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my ($xt)=$f[11]=~/XT:A:(.)/; ## Coordinate - if ($f[1]=~/r/) + if (($f[1] & 16) == 16) { my (@cigar_m)=$f[5]=~/(\d+)M/g; my (@cigar_d)=$f[5]=~/(\d+)D/g; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickSoftClipping.over.pl --- a/scripts/pickSoftClipping.over.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickSoftClipping.over.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -38,7 +38,7 @@ $chr_num =~ s/chr//; if (($chrs{$a[2]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[2];} system("samtools view -bu $title $chr_num\:$lower\-$upper > temp.bam"); - system("samtools view -Xf 0x2 temp.bam > temp.sam"); + system("samtools view -f 0x2 temp.bam > temp.sam"); my $leftseq=""; my $rightseq=""; @@ -79,7 +79,9 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my ($xt)=$f[11]=~/XT:A:(.)/; @@ -94,7 +96,7 @@ my $strand=""; my @z=split(/M/, $f[5]); - if (($f[5]=~/S$/)&&($f[1]=~/r/)) + if (($f[5]=~/S$/)&&(($f[1] & 16) == 16)) { my (@cigar_m)=$f[5]=~/(\d+)M/g; my (@cigar_d)=$f[5]=~/(\d+)D/g; @@ -116,7 +118,7 @@ } # print "\n"; } - elsif (($f[1]=~/R/)&&($z[0]=~/S/)) { + elsif ((($f[1] & 32) == 32)&&($z[0]=~/S/)) { $coor=$f[3]; $strand="+"; my (@clipped)=$z[0]=~/(\d+)S/g; my $cliplen=sum(@clipped); |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqIntervalPos.pl --- a/scripts/pickUniqIntervalPos.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickUniqIntervalPos.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -10,7 +10,9 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my ($xt)=$f[11]=~/XT:A:(.)/; @@ -18,7 +20,7 @@ my $strand="+"; ## parse CIGAR - if(($f[1]=~/R/)&&($f[8] > $ARGV[1])&&($f[8] <= 10000)) + if((($f[1] & 32) == 32)&&($f[8] > $ARGV[1])&&($f[8] <= 10000)) { # CIGAR my (@cigar_m)=$f[5]=~/(\d+)M/g; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqMate.pl --- a/scripts/pickUniqMate.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickUniqMate.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -51,7 +51,10 @@ if ($mm > 5) {next;} - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} + # CIGAR my (@cigar_m)=$f[5]=~/(\d+)M/g; my (@cigar_d)=$f[5]=~/(\d+)D/g; @@ -60,7 +63,7 @@ my $aln_ln=sum(@cigar_m,@cigar_d); my $strand="+"; - if($f[1]=~/r/) + if(($f[1] & 16) == 16) { my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); $f[9]=$seq->revcom->seq; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqMate.pl.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/pickUniqMate.pl.orig Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -0,0 +1,106 @@ +#!/share/bin/perl +use List::Util qw(sum); +use Bio::Seq; + +die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1; + +open in,$ARGV[1]; +my %uniq; +while(<in>) +{ + chomp; + my @f=split; + $uniq{$f[3]}=[@f]; +} +close in; + +open in,$ARGV[0]; +my (%te,@ref,%ref); +while(<in>) +{ + chomp; + my @f=split/\s+/,$_; + # headers + if(/^\@SQ/) + { + my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/; + push @ref,[$sn,$ln]; + $ref{$sn}=$#ref; + next; + } + + # unmapped + next if $f[2] eq "*"; + + my $mm=200; + my $xa=""; + for my $q (11..$#f) + { + if($f[$q]=~/NM:/) + { + $mm=$f[$q]; + $mm =~ s/NM://; + } + + if($f[$q]=~/XA:Z:/) + { + ($xa)=$f[$q]=~/XA:Z:(.*);$/; + last; + } + } + + if ($mm > 5) {next;} + + my ($rnum)=$f[1]=~/(\d)$/; + # CIGAR + my (@cigar_m)=$f[5]=~/(\d+)M/g; + my (@cigar_d)=$f[5]=~/(\d+)D/g; + my (@cigar_s)=$f[5]=~/(\d+)S/g; + my (@cigar_i)=$f[5]=~/(\d+)I/g; + my $aln_ln=sum(@cigar_m,@cigar_d); + + my $strand="+"; + if($f[1]=~/r/) + { + my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); + $f[9]=$seq->revcom->seq; + $strand="-"; + } + + # align to the junctions + if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1]) + { + if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1]) + { + $f[2]=${$ref[$ref{$f[2]}+1]}[0]; + $f[3]=1; + $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1); + } + else + { + $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1; + } + } + + $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";"; + + # XA tag + my @xa=split(";",$xa); + $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa; + +} +close in; + +foreach my $id (keys %pe) +{ + next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"}; + foreach my $rid (keys %{$pe{$id}}) + { + my $mate_id=($rid==1)?2:1; + if(exists $uniq{$id."/".$mate_id}) + { + ${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid}; + print join("\t",@{$uniq{$id."/".$mate_id}}),"\n"; + } + } +} |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq.pl --- a/scripts/pickUniqPairFastq.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickUniqPairFastq.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -13,13 +13,15 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my ($xt)=$f[11]=~/XT:A:(.)/; ## revcom the read mapped to the reverse strand - if($f[1]=~/r/) + if(($f[1] & 16) == 16) { my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); $f[9]=$seq->revcom->seq; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq.pl.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/pickUniqPairFastq.pl.orig Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -0,0 +1,45 @@ +#!/share/bin/perl +use Bio::Seq; + +die "perl $0 <sam> <output prefix>\n" if @ARGV<1; + +open m1,">$ARGV[1].1.fastq"; +open m2,">$ARGV[1].2.fastq"; + +open in,$ARGV[0]; +my %pe; +while(<in>) +{ + chomp; + my @f=split/\t/,$_,12; + ## read number 1 or 2 + my ($rnum)=$f[1]=~/(\d)$/; + + ## XT:A:* + my ($xt)=$f[11]=~/XT:A:(.)/; + + ## revcom the read mapped to the reverse strand + if($f[1]=~/r/) + { + my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); + $f[9]=$seq->revcom->seq; + $f[10]=reverse $f[10]; + } + if (($rnum == 1) || ($rnum == 2)) + { + ${$pe{$f[0]}}[$rnum-1]=[$xt,$f[9],$f[10]]; + } +} +close in; + +foreach my $id (keys %pe) +{ + my @rid=@{$pe{$id}}; + if (($rid[0][1] ne "") && ($rid[1][1] ne "") && (($rid[0][0] eq "U" || $rid[1][0] eq "U"))) + { + print m2 "@"."$id/2","\n",$rid[1][1],"\n","+$id/2","\n",$rid[1][2],"\n"; + print m1 "@"."$id/1","\n",$rid[0][1],"\n","+$id/1","\n",$rid[0][2],"\n"; + } +} +close m1; +close m2; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq_MEM.pl --- a/scripts/pickUniqPairFastq_MEM.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickUniqPairFastq_MEM.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -13,7 +13,9 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my $xt=""; @@ -34,7 +36,7 @@ } ## revcom the read mapped to the reverse strand - if($f[1]=~/r/) + if (($f[1] & 16) == 16) { my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); $f[9]=$seq->revcom->seq; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPairFastq_MEM.pl.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/pickUniqPairFastq_MEM.pl.orig Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -0,0 +1,60 @@ +#!/share/bin/perl +use Bio::Seq; + +die "perl $0 <sam> <output prefix>\n" if @ARGV<1; + +open m1,">$ARGV[1].1.fastq"; +open m2,">$ARGV[1].2.fastq"; + +open in,$ARGV[0]; +my %pe; +while(<in>) +{ + chomp; + my @f=split/\t/,$_,12; + ## read number 1 or 2 + my ($rnum)=$f[1]=~/(\d)$/; + + ## XT:A:* + my $xt=""; + my @a=split(/\s+/, $_); + my $as=0; + my $xs=0; + for my $i (11..$#a) { + if ($a[$i] =~ /^AS:i:/) { + $a[$i] =~ s/AS:i://; + $as=$a[$i]; + } + elsif ($a[$i] =~ /^XS:i:/) { + $a[$i] =~ s/XS:i://; + $xs=$a[$i]; + } + if (($xs > 0) && ($as-$xs <= $ARGV[2])) {$xt="R";} + elsif ($as > 0) {$xt="U";} + } + + ## revcom the read mapped to the reverse strand + if($f[1]=~/r/) + { + my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); + $f[9]=$seq->revcom->seq; + $f[10]=reverse $f[10]; + } + if (($rnum == 1) || ($rnum == 2)) + { + ${$pe{$f[0]}}[$rnum-1]=[$xt,$f[9],$f[10]]; + } +} +close in; + +foreach my $id (keys %pe) +{ + my @rid=@{$pe{$id}}; + if (($rid[0][1] ne "") && ($rid[1][1] ne "") && (($rid[0][0] eq "U" || $rid[1][0] eq "U"))) + { + print m2 "@"."$id/2","\n",$rid[1][1],"\n","+$id/2","\n",$rid[1][2],"\n"; + print m1 "@"."$id/1","\n",$rid[0][1],"\n","+$id/1","\n",$rid[0][2],"\n"; + } +} +close m1; +close m2; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos.pl --- a/scripts/pickUniqPos.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickUniqPos.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -10,14 +10,16 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my ($xt)=$f[11]=~/XT:A:(.)/; my $strand="+"; ## revcomp - if($f[1]=~/r/) + if(($f[1] & 16) == 16) { my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); $f[9]=$seq->revcom->seq; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos.pl.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/pickUniqPos.pl.orig Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -0,0 +1,41 @@ +#!/share/bin/perl +use Bio::Seq; +use List::Util qw(sum); + +die "perl $0 <sam>\n" if @ARGV<1; +open in,$ARGV[0]; +my %pe; +while(<in>) +{ + chomp; + my @f=split/\t/,$_,12; + ## read number 1 or 2 + my ($rnum)=$f[1]=~/(\d)$/; + + ## XT:A:* + my ($xt)=$f[11]=~/XT:A:(.)/; + + my $strand="+"; + ## revcomp + if($f[1]=~/r/) + { + my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); + $f[9]=$seq->revcom->seq; + $strand="-"; + } + + ## parse CIGAR + if($xt eq "U") + { + # CIGAR + my (@cigar_m)=$f[5]=~/(\d+)M/g; + my (@cigar_d)=$f[5]=~/(\d+)D/g; + my (@cigar_s)=$f[5]=~/(\d+)S/g; + my (@cigar_i)=$f[5]=~/(\d+)I/g; + my $aln_ln=sum(@cigar_m,@cigar_d); + + print $f[2],"\t",$f[3]-1,"\t",$f[3]-1+$aln_ln,"\t$f[0]/$rnum\t",$f[9],"\t",$strand,"\n"; + } +} +close in; + |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos_MEM.pl --- a/scripts/pickUniqPos_MEM.pl Wed Oct 26 07:24:45 2016 -0400 +++ b/scripts/pickUniqPos_MEM.pl Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -10,7 +10,9 @@ chomp; my @f=split/\t/,$_,12; ## read number 1 or 2 - my ($rnum)=$f[1]=~/(\d)$/; + #my ($rnum)=$f[1]=~/(\d)$/; + my $rnum=1; + if (($f[1] & 128) == 128) {$rnum=2;} ## XT:A:* my $xt=""; @@ -32,7 +34,7 @@ my $strand="+"; ## revcomp - if($f[1]=~/r/) + if(($f[1] & 16) == 16) { my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); $f[9]=$seq->revcom->seq; |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 scripts/pickUniqPos_MEM.pl.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/pickUniqPos_MEM.pl.orig Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -0,0 +1,56 @@ +#!/share/bin/perl +use Bio::Seq; +use List::Util qw(sum); + +die "perl $0 <sam>\n" if @ARGV<1; +open in,$ARGV[0]; +my %pe; +while(<in>) +{ + chomp; + my @f=split/\t/,$_,12; + ## read number 1 or 2 + my ($rnum)=$f[1]=~/(\d)$/; + + ## XT:A:* + my $xt=""; + my @a=split(/\s+/, $_); + my $as=0; + my $xs=0; + for my $i (11..$#a) { + if ($a[$i] =~ /^AS:i:/) { + $a[$i] =~ s/AS:i://; + $as=$a[$i]; + } + elsif ($a[$i] =~ /^XS:i:/) { + $a[$i] =~ s/XS:i://; + $xs=$a[$i]; + } + if (($xs > 0) && ($as-$xs <= $ARGV[1])) {$xt="R";} + elsif ($as > 0) {$xt="U";} + } + + my $strand="+"; + ## revcomp + if($f[1]=~/r/) + { + my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); + $f[9]=$seq->revcom->seq; + $strand="-"; + } + + ## parse CIGAR + if($xt eq "U") + { + # CIGAR + my (@cigar_m)=$f[5]=~/(\d+)M/g; + my (@cigar_d)=$f[5]=~/(\d+)D/g; + my (@cigar_s)=$f[5]=~/(\d+)S/g; + my (@cigar_i)=$f[5]=~/(\d+)I/g; + my $aln_ln=sum(@cigar_m,@cigar_d); + + print $f[2],"\t",$f[3]-1,"\t",$f[3]-1+$aln_ln,"\t$f[0]/$rnum\t",$f[9],"\t",$strand,"\n"; + } +} +close in; + |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 temp.xml --- a/temp.xml Wed Oct 26 07:24:45 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,124 +0,0 @@ -<tool id ="run_TEMP" name="TEMP" version="0.2.2.0"> - <description></description> - <requirements> - <requirement type="package" version="1.6.924">perl-bioperl</requirement> - <requirement type="package" version="0.7.13">bwa</requirement> - <requirement type="package" version="2.26.0gx">bedtools</requirement> - <requirement type="package" version="324">ucsc-twobittofa</requirement> - <requirement type="package" version="0.1.19">samtools</requirement> - </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command><![CDATA[ - ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai && - ln -f -s "$alignment" alignment.sorted.bam && - bash $__tool_directory__/scripts/TEMP_Insertion.sh - -x "$minimum_score_difference" - -i alignment.sorted.bam - -s $__tool_directory__/scripts - -r "$consensus_te_seqs" - -t "$te_locations" - #if $te_families: - -u "$te_families" - #end if - -m "$mismatches" - -f "$median_insertsize" - -c \${GALAXY_SLOTS:-2} && - bash $__tool_directory__/scripts/TEMP_Absence.sh - -x "$minimum_score_difference" - -i alignment.sorted.bam - -s $__tool_directory__/scripts - -r "$te_locations" - -t "$reference2bit" - -f "$median_insertsize" - -c \${GALAXY_SLOTS:-2} && - mv alignment.insertion.refined.bp.summary $insertion_summary && - mv alignment.absence.refined.bp.summary $absence_summary && - tar -czf archive.tar.gz *insertion* *excision* && mv archive.tar.gz $archive - ]]></command> - <inputs> - <param format="bam" name="alignment" type="data" label="Alignment bam file"/> - <param format="twobit" name="reference2bit" type="data" label="Reference twobit file"/> - <param format="fasta" name="consensus_te_seqs" type="data" label="Consensus TE Seqs fasta file"/> - <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/> - <param format="tabular" name="te_families" type="data" optional="True" label="TE Identifiers and Families"/> - <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/> - <param name="mismatches" min="0" max="5" type="integer" value="3" label="Allow this many mismatches when aligning to TEs"/> - <param name="minimum_score_difference" type="integer" min="10" max="37" value="30" label="Minimum difference between mapping scores"/> - </inputs> - <outputs> - <data format="bed" type="data" name="insertion_summary" label="${alignment.element_identifier} Insertions" /> - <data format="bed" type="data" name="absence_summary" label="${alignment.element_identifier} Absences" /> - <data format="tar" type="data" name="archive" label="${alignment.element_identifier} Compressed output files" /> - </outputs> - <tests> - <test> - <param name="alignment" value="chr2l_bwa_mem.bam" ftype="bam"/> - <param name="reference2bit" value="dm6_chr2l.twobit" ftype="twobit"/> - <param name="consensus_te_seqs" value="test_consensus.fa" ftype="fasta"/> - <param name="te_locations" value="test_TE_annotation.gff3" ftype="bed"/> - <param name="median_insertsize" value="500" ftype="integer"/> - <output name="insertion_summary" file="test_chromosome.insertion.refined.bp.summary" ftype="bed"/> - <output name="absence_summary" file="test_chromosome.absence.refined.bp.summary" ftype="bed"/> - </test> - </tests> - <help> <![CDATA[ - - -TEMP -------------- -TEMP is a software package for detecting transposable elements (TEs) insertions and absences from pooled high-throughput sequencing data - -Current version v1.04 - -Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA - -*Input files/variables* -------------------------- -* Alignment file in BAM format -* Reference genome used in aligning, in fasta or twobit format. -* Transposable Elements' Consensus Sequences in fasta format. -* Annotations of TEs in reference genome in bed format. -* TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results. -* Median Insert Length -* Number of Mismatches allowed (default 3) -* Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped. - -*Output files* ------------------ -* **In the Insertions output file there are 14 columns:** -* Column 1: The chromosome where the detected insertion happens. -* Column 2: The coordinate of the start position of the detected insertion. -* Column 3: The coordinate of the end position of the detected insertion. -* Column 4: The TE family that the detected insertion belongs to. -* Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand. -* Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side. -* Column 7: The total number of read pairs that support the detected insertion. -* Column 8: The estimated population frequency of the detected insertion. -* Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0. -* Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand. -* Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads). -* Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads). - - ------ - - -* **In the Absences output file there are 14 columns:** -* Column 1: The chromosome where the detected absence happens. -* Column 2: The coordinate of the start position of the detected absence. -* Column 3: The coordinate of the end position of the detected absence. -* Column 4: The TE family that the detected insertion belongs to. -* Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. -* Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. -* Column 7: The number of reads supporting the absence. -* Column 8: The number of reads supporting the reference (no absence). -* Column 9: Estimated population frequency of the detected absence event. - - - ]]> </help> - <citations> - <citation type="doi">10.1093/nar/gku323</citation> - </citations> -</tool> |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 temp_absences.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/temp_absences.xml Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -0,0 +1,103 @@ +<tool id ="TEMP_absences" name="TEMP Excision" version="0.3.0"> + <description>finds TEs that have excised relative to the reference</description> + <requirements> + <requirement type="package" version="1.6.924=pl5.22.0_0">perl-bioperl</requirement> + <requirement type="package" version="0.7.13">bwa</requirement> + <requirement type="package" version="2.25.0">bedtools</requirement> + <requirement type="package" version="324">ucsc-twobittofa</requirement> + <requirement type="package" version="1.3.1">samtools</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai && + ln -f -s "$alignment" alignment.sorted.bam && + bash $__tool_directory__/scripts/TEMP_Absence.sh + -x "$minimum_score_difference" + -i alignment.sorted.bam + -s $__tool_directory__/scripts + -r "$te_locations" + -t "$reference2bit" + -f "$median_insertsize" + -c \${GALAXY_SLOTS:-2} && + mv alignment.absence.refined.bp.summary $absence_summary + ]]></command> + <inputs> + <param format="bam" name="alignment" type="data" label="Alignment bam file"/> + <param format="twobit" name="reference2bit" type="data" label="Reference twobit file"/> + <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/> + <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/> + <param name="minimum_score_difference" type="integer" min="10" max="37" value="30" label="Minimum difference between mapping scores"/> + </inputs> + <outputs> + <data format="bed" name="absence_summary" label="${alignment.element_identifier} Absences" /> + </outputs> + <tests> + <test> + <param name="alignment" value="test_chromosome.sorted.bam" ftype="bam"/> + <param name="reference2bit" value="dm3_chr2L.2bit" ftype="twobit"/> + <param name="te_locations" value="test_TE_annotation.bed" ftype="bed"/> + <param name="median_insertsize" value="500" ftype="integer"/> + <output name="absence_summary" file="test_absence_out.bed" ftype="bed"/> + </test> + </tests> + <help> <![CDATA[ + + +TEMP +------------- +TEMP is a software package for detecting transposable elements (TEs) insertions and absences from pooled high-throughput sequencing data + +Current version v1.04 + +Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA + +*Input files/variables* +------------------------- +* Alignment file in BAM format +* Reference genome used in aligning, in fasta or twobit format. +* Transposable Elements' Consensus Sequences in fasta format. +* Annotations of TEs in reference genome in bed format. +* TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results. +* Median Insert Length +* Number of Mismatches allowed (default 3) +* Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped. + +*Output files* +----------------- +* **In the Insertions output file there are 14 columns:** +* Column 1: The chromosome where the detected insertion happens. +* Column 2: The coordinate of the start position of the detected insertion. +* Column 3: The coordinate of the end position of the detected insertion. +* Column 4: The TE family that the detected insertion belongs to. +* Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand. +* Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side. +* Column 7: The total number of read pairs that support the detected insertion. +* Column 8: The estimated population frequency of the detected insertion. +* Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0. +* Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand. +* Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads). +* Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads). + + +----- + + +* **In the Absences output file there are 14 columns:** +* Column 1: The chromosome where the detected absence happens. +* Column 2: The coordinate of the start position of the detected absence. +* Column 3: The coordinate of the end position of the detected absence. +* Column 4: The TE family that the detected insertion belongs to. +* Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. +* Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. +* Column 7: The number of reads supporting the absence. +* Column 8: The number of reads supporting the reference (no absence). +* Column 9: Estimated population frequency of the detected absence event. + + + ]]> </help> + <citations> + <citation type="doi">10.1093/nar/gku323</citation> + </citations> +</tool> |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 temp_insertions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/temp_insertions.xml Mon Dec 05 09:58:47 2016 -0500 |
[ |
@@ -0,0 +1,110 @@ +<tool id ="TEMP_insertions" name="TEMP Insertion" version="0.3.0"> + <description>finds TE insertions relative to reference</description> + <requirements> + <requirement type="package" version="1.6.924=pl5.22.0_0">perl-bioperl</requirement> + <requirement type="package" version="0.7.13">bwa</requirement> + <requirement type="package" version="2.25.0">bedtools</requirement> + <requirement type="package" version="324">ucsc-twobittofa</requirement> + <requirement type="package" version="1.3.1">samtools</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai && + ln -f -s "$alignment" alignment.sorted.bam && + bash $__tool_directory__/scripts/TEMP_Insertion.sh + -x "$minimum_score_difference" + -i alignment.sorted.bam + -s $__tool_directory__/scripts + -r "$consensus_te_seqs" + -t "$te_locations" + #if $te_families: + -u "$te_families" + #end if + -m "$mismatches" + -f "$median_insertsize" + -c \${GALAXY_SLOTS:-2} && + mv alignment.insertion.refined.bp.summary $insertion_summary + ]]></command> + <inputs> + <param format="bam" name="alignment" type="data" label="Alignment bam file"/> + <param format="fasta" name="consensus_te_seqs" type="data" label="Consensus TE Seqs fasta file"/> + <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/> + <param format="tabular" name="te_families" type="data" optional="True" label="TE Identifiers and Families"/> + <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/> + <param name="mismatches" min="0" max="5" type="integer" value="3" label="Allow this many mismatches when aligning to TEs"/> + <param name="minimum_score_difference" type="integer" min="0" max="37" value="30" label="Minimum difference between mapping scores"/> + </inputs> + <outputs> + <data format="bed" name="insertion_summary" label="${alignment.element_identifier} Insertions" /> + </outputs> + <tests> + <test> + <param name="alignment" value="test_chromosome.sorted.bam" ftype="bam"/> + <param name="consensus_te_seqs" value="test_concensus.fa" ftype="fasta"/> + <param name="te_locations" value="test_TE_annotation.bed" ftype="bed"/> + <param name="median_insertsize" value="500" ftype="integer"/> + <param name="minimum_score_difference" value="0" ftype="integer"/> + <output name="insertion_summary" file="test_insertions_out.bed" ftype="bed" compare="sim_size"/> + </test> + </tests> + <help> <![CDATA[ + + +TEMP +------------- +TEMP is a software package for detecting transposable elements (TEs) insertions and absences from pooled high-throughput sequencing data + +Current version v1.04 + +Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA + +*Input files/variables* +------------------------- +* Alignment file in BAM format +* Reference genome used in aligning, in fasta or twobit format. +* Transposable Elements' Consensus Sequences in fasta format. +* Annotations of TEs in reference genome in bed format. +* TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results. +* Median Insert Length +* Number of Mismatches allowed (default 3) +* Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped. + +*Output files* +----------------- +* **In the Insertions output file there are 14 columns:** +* Column 1: The chromosome where the detected insertion happens. +* Column 2: The coordinate of the start position of the detected insertion. +* Column 3: The coordinate of the end position of the detected insertion. +* Column 4: The TE family that the detected insertion belongs to. +* Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand. +* Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side. +* Column 7: The total number of read pairs that support the detected insertion. +* Column 8: The estimated population frequency of the detected insertion. +* Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0. +* Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand. +* Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads). +* Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads). + + +----- + + +* **In the Absences output file there are 14 columns:** +* Column 1: The chromosome where the detected absence happens. +* Column 2: The coordinate of the start position of the detected absence. +* Column 3: The coordinate of the end position of the detected absence. +* Column 4: The TE family that the detected insertion belongs to. +* Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. +* Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. +* Column 7: The number of reads supporting the absence. +* Column 8: The number of reads supporting the reference (no absence). +* Column 9: Estimated population frequency of the detected absence event. + + + ]]> </help> + <citations> + <citation type="doi">10.1093/nar/gku323</citation> + </citations> +</tool> |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/chr2l_bwa_mem.bam |
b |
Binary file test-data/chr2l_bwa_mem.bam has changed |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/dm3_chr2L.2bit |
b |
Binary file test-data/dm3_chr2L.2bit has changed |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/dm6_chr2l.twobit |
b |
Binary file test-data/dm6_chr2l.twobit has changed |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_TE_annotation.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_TE_annotation.bed Mon Dec 05 09:58:47 2016 -0500 |
b |
@@ -0,0 +1,115 @@ +chr2L 1301606 1302488 FBgn0001167_gypsy . - +chr2L 2094501 2094580 FBgn0000155_roo . - +chr2L 2100429 2109522 FBgn0000155_roo . - +chr2L 2112167 2118361 FBgn0003007_opus . - +chr2L 2118446 2119772 FBgn0003007_opus . - +chr2L 2159453 2159556 FBgn0000155_roo . - +chr2L 2267349 2267457 FBgn0000155_roo . + +chr2L 2294096 2299243 FBgn0000349_copia . - +chr2L 2378805 2378893 FBgn0000155_roo . + +chr2L 2530303 2530389 FBgn0000155_roo . + +chr2L 2565592 2569028 FBgn0000005_297 . + +chr2L 2565667 2565886 FBgn0000004_17.6 . + +chr2L 2565869 2566006 FBgn0063450_Tom1 . + +chr2L 2565871 2566024 FBgn0061485_rover . + +chr2L 2565920 2566024 FBgn0063917_McClintock . + +chr2L 2566158 2569026 FBgn0000004_17.6 . + +chr2L 2566674 2566848 FBgn0061485_rover . + +chr2L 2567367 2569022 FBgn0061485_rover . + +chr2L 2567598 2567816 FBgn0063917_McClintock . + +chr2L 2567665 2569027 FBgn0044355_Quasimodo . + +chr2L 2568060 2569027 FBgn0026065_Idefix . + +chr2L 2568062 2569018 FBgn0063917_McClintock . + +chr2L 2568070 2569004 FBgn0063447_accord . + +chr2L 2568121 2568988 FBgn0004082_Tirant . + +chr2L 2568137 2569006 FBgn0063432_gypsy5 . + +chr2L 2568137 2568942 FBgn0040267_Transpac . + +chr2L 2568153 2569001 FBgn0063782_accord2 . - +chr2L 2568154 2568990 FBgn0023131_ZAM . + +chr2L 2568193 2568993 FBgn0003007_opus . + +chr2L 2568251 2568697 FBgn0000006_412 . + +chr2L 2568264 2568985 FBgn0063434_gypsy3 . + +chr2L 2568264 2568985 FBgn0003490_springer . + +chr2L 2568308 2568520 FBgn0067387_gypsy10 . + +chr2L 2568308 2568517 FBgn0067384_gypsy7 . + +chr2L 2568308 2568878 FBgn0063431_gypsy6 . + +chr2L 2568308 2568703 FBgn0001167_gypsy . + +chr2L 2568313 2568828 FBgn0002697_mdg1 . + +chr2L 2568313 2568526 FBgn0000199_blood . + +chr2L 2568329 2568982 FBgnnnnnnnn_HMS-Beagle2 . + +chr2L 2568329 2568982 FBgn0001207_HMS-Beagle . + +chr2L 2568378 2568648 FBgn0063897_Stalker4 . + +chr2L 2568378 2568878 FBgn0063433_gypsy4 . + +chr2L 2568384 2568646 FBgn0063435_gypsy2 . + +chr2L 2568384 2568796 FBgn0002698_mdg3 . + +chr2L 2569006 2569756 FBgn0063917_McClintock . + +chr2L 2569007 2571200 FBgn0000004_17.6 . + +chr2L 2569010 2571603 FBgn0000005_297 . + +chr2L 2569018 2569804 FBgn0061485_rover . + +chr2L 2569064 2570806 FBgn0044355_Quasimodo . + +chr2L 2569064 2569752 FBgn0026065_Idefix . + +chr2L 2569859 2571024 FBgn0061485_rover . + +chr2L 2569987 2570809 FBgn0026065_Idefix . + +chr2L 2570511 2570703 FBgn0063917_McClintock . + +chr2L 2571048 2571200 FBgn0063917_McClintock . + +chr2L 2571264 2571483 FBgn0000004_17.6 . + +chr2L 2571466 2571603 FBgn0063450_Tom1 . + +chr2L 2571468 2571592 FBgn0061485_rover . + +chr2L 2661257 2663012 FBgn0001249_I-element . + +chr2L 2713413 2713444 FBgn0063371_transib2 . - +chr2L 2772652 2776969 FBgn0000005_297 . + +chr2L 2772727 2772946 FBgn0000004_17.6 . + +chr2L 2772929 2773066 FBgn0063450_Tom1 . + +chr2L 2772931 2773084 FBgn0061485_rover . + +chr2L 2772980 2773084 FBgn0063917_McClintock . + +chr2L 2773736 2773910 FBgn0061485_rover . + +chr2L 2774429 2776968 FBgn0061485_rover . + +chr2L 2774429 2776968 FBgn0000004_17.6 . + +chr2L 2774660 2774878 FBgn0063917_McClintock . + +chr2L 2774727 2776980 FBgn0044355_Quasimodo . + +chr2L 2775122 2776985 FBgn0026065_Idefix . + +chr2L 2775124 2776969 FBgn0063917_McClintock . + +chr2L 2775132 2776531 FBgn0063447_accord . + +chr2L 2775183 2776509 FBgn0004082_Tirant . + +chr2L 2775199 2776553 FBgn0063432_gypsy5 . + +chr2L 2775199 2776494 FBgn0040267_Transpac . + +chr2L 2775215 2776321 FBgn0063782_accord2 . - +chr2L 2775216 2776513 FBgn0023131_ZAM . + +chr2L 2775255 2776055 FBgn0003007_opus . + +chr2L 2775313 2775759 FBgn0000006_412 . + +chr2L 2775326 2776047 FBgn0063434_gypsy3 . + +chr2L 2775326 2776047 FBgn0003490_springer . + +chr2L 2775370 2775579 FBgn0067384_gypsy7 . + +chr2L 2775370 2775765 FBgn0001167_gypsy . + +chr2L 2775375 2775890 FBgn0002697_mdg1 . + +chr2L 2775375 2775588 FBgn0000199_blood . + +chr2L 2775391 2776044 FBgnnnnnnnn_HMS-Beagle2 . + +chr2L 2775391 2776044 FBgn0001207_HMS-Beagle . + +chr2L 2775429 2775767 FBgn0010302_Burdock . + +chr2L 2775440 2775710 FBgn0063897_Stalker4 . + +chr2L 2775440 2776515 FBgn0063433_gypsy4 . + +chr2L 2775442 2775582 FBgn0067387_gypsy10 . + +chr2L 2775446 2775858 FBgn0002698_mdg3 . + +chr2L 2776093 2776340 FBgn0000199_blood . + +chr2L 2776099 2776519 FBgnnnnnnnn_HMS-Beagle2 . + +chr2L 2776156 2776324 FBgn0063436_gtwin . + +chr2L 2776156 2776516 FBgn0063431_gypsy6 . + +chr2L 2776179 2776389 FBgn0003007_opus . + +chr2L 2776938 2777318 FBgn0063917_McClintock . + +chr2L 2776958 2777320 FBgn0000004_17.6 . + +chr2L 2776962 2777324 FBgn0061485_rover . + +chr2L 2776962 2777324 FBgn0000005_297 . + +chr2L 2776975 2777315 FBgn0044355_Quasimodo . + +chr2L 2777321 2779175 FBgn0000005_297 . + +chr2L 2777323 2778772 FBgn0000004_17.6 . + +chr2L 2777510 2778596 FBgn0061485_rover . + +chr2L 2777559 2778381 FBgn0026065_Idefix . + +chr2L 2777565 2778378 FBgn0044355_Quasimodo . + +chr2L 2778083 2778275 FBgn0063917_McClintock . + +chr2L 2778620 2778772 FBgn0063917_McClintock . + +chr2L 2778836 2779055 FBgn0000004_17.6 . + +chr2L 2779038 2779175 FBgn0063450_Tom1 . + +chr2L 2779040 2779164 FBgn0061485_rover . + +chr2L 2933353 2935475 FBgn0003122_pogo . - +chr2L 2945631 2945785 FBgn0000155_roo . + +chr2L 2963474 2963538 FBgn0000155_roo . + |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_TE_annotation.gff3 --- a/test-data/test_TE_annotation.gff3 Wed Oct 26 07:24:45 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,934 +0,0 @@\n-chr2L\tFlyBase\tIdefix\t9726\t9859\t.\t+\t.\tID=FBti0060580;Name=Idefix{}2519;Alias=Idefix#20,TE60580;derived_cyto_location=21A5-21A5\n-chr2L\tFlyBase\tINE-1\t9888\t9949\t.\t-\t.\tID=FBti0059810;Name=INE-1{}1749;Alias=INE-1#4,TE59810;derived_cyto_location=21A5-21A5\n-chr2L\tFlyBase\tINE-1\t24236\t24462\t.\t+\t.\tID=FBti0059812;Name=INE-1{}1751;Alias=INE-1#5,TE59812;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t27530\t27788\t.\t-\t.\tID=FBti0059814;Name=INE-1{}1753;Alias=INE-1#6,TE59814;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tjockey\t47514\t52519\t.\t+\t.\tID=FBti0019092;Name=jockey{}277;Alias=jockey{}277,TE19092;gbunit=AE003590;derived_cyto_location=21B1-21B1,21A3-21A3;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t60240\t60580\t.\t+\t.\tID=FBti0059816;Name=INE-1{}1755;Alias=INE-1#7,TE59816;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t60663\t60773\t.\t+\t.\tID=FBti0059818;Name=INE-1{}1757;Alias=INE-1#8,TE59818;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tX\t64316\t64936\t.\t+\t.\tID=FBti0019093;Name=X{}278;Alias=X{}278,CG18303,TE19093;gbunit=AE003590;derived_cyto_location=21B1-21B1,21A4-21A4;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t65664\t65698\t.\t-\t.\tID=FBti0059819;Name=INE-1{}1758;Alias=INE-1#9,TE59819;derived_cyto_location=21B1-21B1;derived_comput_cyto_location=21B1\n-chr2L\tFlyBase\tINE-1\t116518\t116976\t.\t+\t.\tID=FBti0060557;Name=INE-1{}2496;Alias=INE-1#10,TE60557;derived_cyto_location=21B2-21B2;derived_comput_cyto_location=21B2\n-chr2L\tFlyBase\tMcClintock\t123547\t123635\t.\t-\t.\tID=FBti0060562;Name=McClintock{}2501;Alias=McClintock#15,TE60562;derived_cyto_location=21B2-21B2;derived_comput_cyto_location=21B2\n-chr2L\tFlyBase\tINE-1\t172692\t172724\t.\t+\t.\tID=FBti0060558;Name=INE-1{}2497;Alias=INE-1#11,TE60558;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tINE-1\t173743\t173778\t.\t-\t.\tID=FBti0060559;Name=INE-1{}2498;Alias=INE-1#12,TE60559;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tI\t176393\t176512\t.\t-\t.\tID=FBti0019095;Name=I{}279;Alias=I{}279,TE19095;gbunit=AE003590;derived_cyto_location=21B4-21B4,21B3-21B3;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tINE-1\t178279\t178467\t.\t+\t.\tID=FBti0060560;Name=INE-1{}2499;Alias=INE-1#13,TE60560;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tINE-1\t179822\t179909\t.\t+\t.\tID=FBti0060561;Name=INE-1{}2500;Alias=INE-1#14,TE60561;derived_cyto_location=21B4-21B4;derived_comput_cyto_location=21B4\n-chr2L\tFlyBase\tblood\t347941\t355383\t.\t-\t.\tID=FBti0019096;Name=blood{}280;Alias=blood{}280,TE19096;gbunit=AE003589;derived_cyto_location=21C1-21C1,21C2-21C2;derived_comput_cyto_location=21C1\n-chr2L\tFlyBase\tINE-1\t451600\t451692\t.\t-\t.\tID=FBti0063763;Name=INE-1{}5702;Alias=INE-1#10915,TE63763;derived_cyto_location=21C7-21C7;derived_comput_cyto_location=21C2\n-chr2L\tFlyBase\tINE-1\t634184\t634245\t.\t-\t.\tID=FBti0060585;Name=INE-1{}2524;Alias=INE-1#28,TE60585;derived_cyto_location=21E2-21E2;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\troo\t686974\t695955\t.\t-\t.\tID=FBti0019098;Name=roo{}281;Alias=roo{}281,TE19098;gbunit=AE003588;derived_cyto_location=21E2-21E2,21D2-21D2;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\tBari1\t770516\t772243\t.\t-\t.\tID=FBti0019099;Name=Bari1{}282;Alias=Bari1{}282,TE19099;gbunit=AE003588;derived_cyto_location=21E2-21E2,21D2-21D2;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\troo\t976935\t984512\t.\t+\t.\tID=FBti0019100;Name=roo{}283;Alias=roo{}283,TE19100;gbunit=AE003587;derived_cyto_location=21E2-21E2,21E1-21E1;derived_comput_cyto_location=21E2\n-chr2L\tFlyBase\troo\t996780\t1005816\t.\t-\t.\tID=FBti0019101;Name=roo{}284;Alias=roo{}284,TE19101;gbunit=AE003587;derived_cyto_location=21E3-21E3,21E2-21E2;derived_comput_cyto_location=21E3\n-chr2L\tFlyBase\tblood\t1220184\t1227592\t.\t+\t.\tID=FBti0019102;Name=blood{}285;Alias=blood{}285,TE19102;gbunit=AE003587;derived_cyto_location=21F2-21'..b'8;Alias=HMS-Beagle#1508,TE60549;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22409494\t22409773\t.\t-\t.\tID=FBti0060684;Name=invader1{}2623;Alias=invader1#1573,TE60684;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tinvader1\t22409895\t22410321\t.\t+\t.\tID=FBti0060551;Name=HMS-Beagle{}2490;Alias=HMS-Beagle#1509,TE60551;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle2\t22410897\t22411176\t.\t-\t.\tID=FBti0060686;Name=invader1{}2625;Alias=invader1#1574,TE60686;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22411294\t22411721\t.\t+\t.\tID=FBti0060390;Name=HMS-Beagle2{}2329;Alias=HMS-Beagle2#100081,TE60390;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tinvader1\t22412695\t22413121\t.\t+\t.\tID=FBti0060565;Name=HMS-Beagle{}2504;Alias=HMS-Beagle#1511,TE60565;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22413694\t22413973\t.\t-\t.\tID=FBti0060688;Name=invader1{}2627;Alias=invader1#1575,TE60688;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tinvader1\t22414091\t22414518\t.\t+\t.\tID=FBti0060391;Name=HMS-Beagle{}2330;Alias=HMS-Beagle#100082,TE60391;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tHMS-Beagle\t22415073\t22415352\t.\t-\t.\tID=FBti0060689;Name=invader1{}2628;Alias=invader1#1576,TE60689;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tDoc\t22415470\t22415824\t.\t+\t.\tID=FBti0060570;Name=HMS-Beagle{}2509;Alias=HMS-Beagle#1513,TE60570;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tFlyBase\tS\t22415856\t22420241\t.\t-\t.\tID=FBti0060540;Name=Doc{}2479;Alias=Doc#1502,TE60540;derived_cyto_location=40F7-40F7;derived_comput_cyto_location=40F7\n-chr2L\tDHGP\troo\t22582024\t22583757\t.\t+\t.\tID=RR48839_transposable_element;Name=S{}RR48839;h-band_cyto_range=h35-h36;cyto_range=40D-40F\n-chr2L\tDHGP\tF\t22605666\t22614751\t.\t-\t.\tID=RR49033_transposable_element;Name=roo{}RR49033;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tMax\t22679852\t22684556\t.\t+\t.\tID=RR48902_transposable_element;Name=F{}RR48902;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tS\t22765149\t22773721\t.\t+\t.\tID=RR48810_transposable_element;Name=Max{}RR48810;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tGATE\t22776474\t22778207\t.\t+\t.\tID=RR48497_transposable_element;Name=S{}RR48497;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tinvader3\t22899297\t22907772\t.\t+\t.\tID=RR48945_transposable_element;Name=GATE{}RR48945;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tinvader3\t23057903\t23063349\t.\t-\t.\tID=RR49017_transposable_element;Name=invader3{}RR49017;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tFlyBase\tJuan\t23059027\t23060915\t.\t+\t.\tID=FBti0059748;Name=invader3{}1687;Alias=TE19812,TE19813,FBti0019812,FBti0019813,invader3#864,invader3{}454,invader3{}455,TE59748;derived_cyto_location=38C2-38C2\n-chr2L\tDHGP\tDoc\t23267989\t23272224\t.\t+\t.\tID=RR48345_transposable_element;Name=Juan{}RR48345;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tF\t23288997\t23293720\t.\t-\t.\tID=RR48348_transposable_element;Name=Doc{}RR48348;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tIvk\t23353253\t23357949\t.\t-\t.\tID=RR44129_transposable_element;Name=F{}RR44129;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\troo\t23382539\t23387909\t.\t+\t.\tID=RR48366_transposable_element;Name=Ivk{}RR48366;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tDoc\t23405712\t23414824\t.\t+\t.\tID=RR48370_transposable_element;Name=roo{}RR48370;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tMcClintock\t23467777\t23472472\t.\t+\t.\tID=RR41919_transposable_element;Name=Doc{}RR41919;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n-chr2L\tDHGP\tBari1\t23475714\t23482166\t.\t+\t.\tID=RR48378_transposable_element;Name=McClintock{}RR48378;cyto_range=40D-40F;h-band_cyto_range=h35-h36\n' |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_absence_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_absence_out.bed Mon Dec 05 09:58:47 2016 -0500 |
b |
@@ -0,0 +1,6 @@ +Chr Start End TransposonName 5'_Junction 3'_Junction Variant Reference Frequency +chr2L 2100429 2109522 FBgn0000155_roo 2100429(+),2100429(-) 2109523(+),2109523(-) 27 0 1.0000 +chr2L 2112167 2119772 FBgn0003007_opus 2112167(-) 2119773(-) 27 0 1.0000 +chr2L 2294096 2299243 FBgn0000349_copia 2294096(+),2294096(-) 2299244(+),2299244(-) 45 0 1.0000 +chr2L 2661257 2663012 FBgn0001249_I-element 2661257(+) 2663013(+) 28 0 1.0000 +chr2L 2933353 2935475 FBgn0003122_pogo 2933346(+),2933353(-) 2935469(+),2935476(-) 44 0 1.0000 |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_chromosome.absence.refined.bp.summary --- a/test-data/test_chromosome.absence.refined.bp.summary Wed Oct 26 07:24:45 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -Chr Start End TransposonName 5'_Junction 3'_Junction Variant Reference Frequency |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_chromosome.insertion.refined.bp.summary --- a/test-data/test_chromosome.insertion.refined.bp.summary Wed Oct 26 07:24:45 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -Chr Start End TransposonName TransposonDirection Class VariantSupport Frequency Junction1 Junction1Support Junction2 Junction2Support 5'_Support 3'_Support -chr2L 8900590 8900703 FBgn0000481_Doc antisense 1p1 8 0.1667 8900646 0 8900646 0 5 3 -chr2L 8907814 8908314 FBgn0000155_roo antisense singleton 1 0.0179 8908064 0 8908064 0 0 1 -chr2L 8927841 8928341 FBgn0004141_HeT-A sense singleton 1 0.0333 8928091 0 8928091 0 0 1 -chr2L 8959585 8960085 FBgn0000481_Doc sense 2p 3 0.0236 8959835 0 8959835 0 0 3 -chr2L 8988151 8988651 FBgn0000155_roo antisense singleton 1 0.0159 8988401 0 8988401 0 1 0 -chr2L 8989135 8989635 FBgn0004141_HeT-A antisense singleton 0.5 0.0182 8989385 0 8989385 0 0 0.5 -chr2L 8989135 8989635 FBgn0000481_Doc antisense singleton 0.5 0.0182 8989385 0 8989385 0 0 0.5 |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_chromosome.sorted.bam |
b |
Binary file test-data/test_chromosome.sorted.bam has changed |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_concensus.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_concensus.fa Mon Dec 05 09:58:47 2016 -0500 |
b |
b'@@ -0,0 +1,1143 @@\n+>FBgn0010302_Burdock\n+AGTTAACACAATCACAAAACACCCGAAATATAGTCGTAAGCCTCAAGTGC\n+TTTTCCCATCTATAGATCGAGCTTTACCTATAAGAAACTGTAACTTGTTA\n+AGCTTTAGAGATAAGAACTCTTGCTATACTTAAGTCAGTCGATTTTGGAA\n+GATTAGAAGCGTCGGTCATCGCCACGTACTTACTATTCGTCTCATTAAGT\n+GCAGACCGCGCAAGCCTATTGTAATTAATAAACTTACGCTAATAAATATA\n+TGGAAAATCTACTAAAATGATAATTGGCGCCCAAACGGATATAAAAACCT\n+ACGATAACTGAATAATTATAAATAAATAACAAAAGGAGGATCCGGAGACA\n+AAACCAGCGGCTTTGGCTAATTAACTCTAACCTAAGAAATAAAAATTTGC\n+TGATTACATAAAATATAATATTAATTACTAAGACCATCTACCTTAAAATT\n+GTTTGTTAATCACTATTATTATATTGTAAGTATAACGCTTATTGAACGAA\n+TTAAAAATATTATTATTATTATTATATTATAACCTATGCAAAGAGTATTG\n+ATAATAAAAATACATGAGTGACAGTGATAACCTTTTAGACAACCTAGTGT\n+CAAGCTTAAATAAATGGTCAGCGCACCAGGCAAGTAGGCAAAACAGTGCA\n+GAAAAAAATAATAAGTCATCAGATAATTGGTGGTCAAAAACAAAGACAAC\n+TAGCGAAATGGAATTTGAAGCTCAGTTAAAAGCGATCGTAGAGAGTGCTG\n+TTGCCGGTGCGCTCGCAGTCCAAAAACAATCATTTGAAAAGCAATTGCAG\n+GAGATGAATGAGCGAATCGGGAAATTAACAGTGAACACCCCAGAGGTGGA\n+AACTTATGTAGATGCTGAAATTAGACCAGGTGTTGTCTGTAGCGAGCCTC\n+TAGATATACTTAAATCTCTGCCAGATTTTGATGGCAAAAGTGAAACATAT\n+GTGTCGTGGAGAAAAGCGGCTCATGTCGCTTTTAAAGTTTTCAAAGATTA\n+CGAGGGAAGTTCAACATTTTACCAAGCTCTTGGTATTATGCGAAATAAAA\n+TAAAAGGTCCAGCGAATACAGTATTGGCTTCTTTTAATACTCCGTTACAT\n+TTCAAAGCAATGATCAGCCGTCTTGATTTCACATATTCTGACAAAAGGCC\n+GATCTATCTAATCGAACAAGAGCTATCAACTTTGCGACAGGGAGACATGA\n+CTCTTACTGAATTCTACGATGAAGTCGAGAAAAAACTGACCCTACTTACC\n+AACAAGACAATAATGACATTTGATAGTGCCTTGGCGATGTCACTGAATGA\n+AAAGTACAGGACGGACGCGTTACGTGTATTTGTAACCGGAGCTAAGAAAT\n+CGTTGAGCGACATTCTTTTTGCAAAAGGTCCAAAAGATTTACCAACTGCT\n+CTCGCTTTAGCGCAAGAGGTCGAGTCGAACCATGAGCGTTACCAATTCGC\n+CCTTATTTATTCTAAAAATATTGGAGACAGGGGTCAGAAAATCGAACAAA\n+GGCACAGCGATAAGGATAGAAACTCAATCATGCCCATGCAAACTAAAAAC\n+CCATATTTTAGCAAGCGTCAGGTGCATACTTATGATAACCAGGAAAGACA\n+AGATCCAGTCCAGTTAACAAATCCTGATGTATCCATGCGATCTAGAAGAA\n+CTGGAAATTTTGGACAAACTCCATTTCCGACTCAGGGAAATATTTGGCCA\n+TCCCAACAGCAAAATTCTTGGCCATCTCAACAACAATATTCTTGGCCATC\n+CCAACAACAAAATTCATTTCGAACACAAAATCAATTCGCATCGCAACCCC\n+AACAGCAAAACACAAGTCAGGCTCAGGGACATTTTGGGTATGCGCAAGCA\n+TCAAAAAGACCAACGAGTGGCAGTGCAAGGTTTACAGGGCCAAAACAGCA\n+GAGGATCAACTACTTACCTCATGAGAAAGGTCAATGTGAGGAAGATACAG\n+ACGGTTATCAAAAGGAGGCAGAAGCGGAGGTTGATGATTATGAGGACGAA\n+CTAGTGAATTACGATCATGTTCATTTTTTAGCCACAAATCCCTGCTACCG\n+TACATAGAAAGAGAGATAGCAGGGAGAACCATAAAACTTTTGATTGACAC\n+CGGGGCTTCGAAAAATTACATACAGCCCCTCCCTGAATTAAAAAACATAA\n+TGCCGGTACAAAATAAATTCACGGTAAAATCGCTTCATGGTTGCAACACC\n+GTCAAACAGAAATGCTTTATTAAGCTATTTAACACATCTGTTCAATTCTT\n+TATTCTTCCAAGTCTCTCTAGTTTTGACGCAATAATAGGACTTGACCTTT\n+TGAAACAGGGAAATGCAACGTTAGATTTTAAGAACAAAACGTTGAATATC\n+AACAATGAAGTGGAATCTATTCAGTTTTTGAGATGTGACAGCGTAAATTT\n+CGCCAACATAGAGAATATTGTGGTTCCAAATCAGATATCTAATAAATTCC\n+ATACAATGCTTCGAAACCGATTGGCCGTCTTTGCGGAACCGGAAGAAGCA\n+CTGCCGTATAATACCAACATTGTTGCCACAATACGTACTGAGGACGACCA\n+ACCCATTTACTCAAAACTCTATCCGTACCCCATGGGCGTATCGGATTTTG\n+TGAATAAGGAGACACATGCTTTGTTAAAGGACGGAATTATCAGGCCCTCG\n+TCGTCACCTTACAACAATCCGGTTTGGGTAGTCGATAAAAAAGGTACAGA\n+TGAAGAGGGAAATACTAAGAAAAGGTTGGTTATAGATTTTAGAAAACTAA\n+ATTTAAAAACAATCGACGACAAGTACCCTATACCAAACGTAGTATGGATC\n+TTGTCAAATTTGGGAAAAGCCAGATTCTTTACAACCCTTGACCTTAAATC\n+GGCGTTTCACCAAATTCTGCTCGCAGAAAAGGATAGAGCGAAAACTGCCT\n+TTTCAGTAGGAAATGGAAAATACGAGTTTTGCCGTTTGCCGTTTGGCTTG\n+AAAAATGCCCCAAGTATTTTTCAACGTGCTATTGATGATGTTGTTAGGGA\n+CCGTATAGGAAAGTCATGTTACGTTTACGTTGACGACGTAATAATATTTT\n+CAAACGGAATTGAGGACCACGTAAACGACGTTGCTTGGGTACTAGACAGA\n+CTGTCTGGGGCAAACATGAGGGTTTCTAAAGAGAAATCGTTTTTCTTCAA\n+GGAAAGCGTCGAGTATCTCGGATTCATGGTGTCAAGTGGAGGTATCACAA\n+CCAGTCCTAGCAAAGTAGAGGCTATTCAGAAATATAATCAACCTACTAAT\n+CTGTTTAGTGTTCGATCGTTTTTAGGGCTAGCAAGTTATTACCGCTGCTT\n+TATTAAGGACTTCGCCTCTATTGCTAGACCACTCACTGACATTCTGAAGG\n+GTGAAAACGGAAAGGTTTCCGCAAGCCAGTCTAAAAAGATACCAATTTCT\n+TTCGATGAAAGACAATGTTCTGCTTTTGAGAAGCTTAAAAATGTTCTTGT\n+CTCCGAAAATGTAATGTTATTGTATCCCGATTATAGAAAAGCCTTTGACT\n+TAACAACAGACGCTTCGGCTTTTGGCCTGGGGGCAGTCTTATCACAGGAT\n+GGCAAGCCTGTTACAATGATTTCGAGAACTTTACAGGATAGAGAACTTAA\n+TTTCGCAACAAATGAACGAGAACTTTTGGCCATCGTTTGGGCTTTAAAGT\n+CTCTTAGGAACTATCTATATGGTGTCAAAAACTTAAACATTTTTACAGAT\n+CACCAGCCGTTAACATACGCCGTGTCAGATAGGAATCCAAATGCAAAAAT\n+CAAGAGATGGAAGGCGTTTATAGACGAACATAATGCTAAAATTTTCTATA\n+AACCT'..b'AAGTAATCAGACAAGTCAAATTACTCACTAACGA\n+AAAAACGGTGGTAGTACCAAATCAGGAGCTGCAACCAGGTATAATAGTAG\n+CAAGCACCATTGCCGATAGCAAAAACGCATTGATTCGCATTATAAATACA\n+AATAATAAAGACGCCATAATAGATAGCGCGAAGATCAAATGCGAATCAAT\n+GAAAGACTATGACATTTTTACAACACCAGTAGAAAAGGAAAATAGAACTG\n+AAGAAATTTTAAAACAATTAAGATTCCCTAAACAATTCAATAATGAACTA\n+ACTAAGTTATGCACCGAGTTTAGCGATATTTTTGGTCTAGAAACAGAACC\n+AATATCGGCTAACAATTTCTACAAACAAAAACTCAGATTAGGGGAAAAAA\n+CACCGGTCTATATAAAAAACTATCGCATGGCAGATAGCCAAAAACCAGAA\n+ATCGCCAGACAGGTAAAAAAATTAATAGATGATGGAATAGTTGAACCATC\n+AATGTCTGAATATAATAGTCCATTACTTTTGGTTCCAAAGAAACCACTTC\n+CGAATTCCACGGAAAAAAGATGGCGATTAGCAGTTGACTATCGTCAAATA\n+AATAAGAAACTATTATCAGACAAATTTCCACTTCCAAGAATAGAAGATAT\n+TCTTGATCAATTAGGAAGAGCAAAGTATTTTTCATGTCTCGACCTAATGT\n+CTGGATTCCACCAGATAGAACTAGAAAAAAGGTATAGAGATATAACGTCA\n+TTTTCAACAGCCAATGGCTCATATCGCTTCACGCGATTACCATACGGACT\n+GAAAGTAGCACCAAACTCCTTCCAACGTAGGATGACACTTGCATTTTCTG\n+GTCTTGAACCATCGCAAGCATTTCTATATATGGATGACTTAGTAGTAATA\n+GGTTGTTCAGAAAAACATATGCTCAAAAATTTGACTAACGTATTCGAGCT\n+ATGTAGACGACATAATTTGAAACTACATCCAGGGAAATGTTCTTTCTTTA\n+TGAAAGAAGTAACATATTTGGGTCACAAATGTACCGATAAAGGTATACTC\n+CCAGATGACACCAAATATGAAGTTATAGAAAAATATCCTATACCAACAGA\n+TGCCGACAGTGCTAGGCGTTTCGTAGCCTTCTGTAATTATTACAGACGTT\n+TCATTAAAAATTTTTCTGATCATTCACGCCACTTAACGAGGCTTTGTAAA\n+AAGAATGTTCAATTCGAATGGACAGCAGAATGCAATGATGCATTCGAATA\n+CCTTAAAACAGAATTAATGAAACCAACATTACTACAGTACCCAGATTTCG\n+GTAAAGAATTTTGCATAACAACCGATGCTAGTAAACAGGCATGCGGAGCG\n+GTACTTACACAAGATCACAATGGTCAACAACTTCCAGTGGCATACGCTTC\n+AAGAATGTTCACTCAAGGTGAAAGTAATAAGTCCACTACAGAACAAGAAT\n+TAACGGCCATTCATTGGGCCATAAATCATTTTCGACCATACATATATGGC\n+AAGCATTTCATGGTAAAAAGCGATCATAGACCATTGTCATACCTATTCTC\n+TATGAAAAATCCAAGTTCAAAACTCACTCGTATGAGGCTGGATTTAGAAG\n+AGTATGACTTTACTGTAGAATATCTTAAGGGGAAAGATAACCATATTGCG\n+GACGCCTTGTCTCGCATAACAATAAAAGATCTGAAAACAATCAACAGAGA\n+AATATTAAAAGTTACCACCAGATCAAAAGCTAAACAGGAAAATTCCTGTA\n+AGGACGAAGCAATAGTCAAAATACAAGAGGAAAAAGAGCAAACAATAGAA\n+AAGCCCAAAGTCTATGAAGTTGTCAATAATAATGACACAAAGAAATATGT\n+TTTAATCAAAATAGATAAACACAAGTGTTTATTAAAACGAGGAAAAACAA\n+TTGTTTCACGCTTTGATGTTGATGACTTGTATTCTAATGAAACATTTGAT\n+CTAAATCAATTCTTTCAAAGGCTTATTTCAAAAGCCGGAATGCATAAAAT\n+AACAAAAATGCGAATATCACCAAGCGAACAGATGTTCCAATTTGTATCAC\n+TAAATGAATTTAAAATAAAGGGCAACCGAGTACTCGAAAAAGTAGAACTA\n+GCTATTCTACAAAAGGTGATAATTATAGACAAAAATGACGAAGCTCAGAT\n+TAAAGAAATTTTGACAAAATTCCATGATGATCCTATAGAAGGAGGCCACA\n+CTGGTATTTCGCGAACCCAGTCAAAAATCAAAAGATTTTATTATTGGCCC\n+CAGATGACCAAGACAATCTCAAAGTATGTAAAGACTTGTTTGAAATGTCA\n+ACAAGCCAAAATTACAACACATACGAAAACTCCATTAACATTGATGCCAA\n+CGCCAGCAACAGCATTTGATACTGTTTTAATTGATACCATTGGTCCACTA\n+CCGAAATCGGAAGACGGAAATGAGTATGCAGTTACAATCATATGCGATCT\n+AACCAAGTTTTTAGTAACTATTCCAACACCAAATAAAAGTGCTAAAACAG\n+TTGCAAAGGCTATATTTGAATTATTTGTACTGAAGTACGGTCCAATGAAG\n+ACGTTCATTACAGATCAAGGTACGGAATACAAAAATTCACTTATGAATGA\n+ATTATGCAAATATATGCATATAGAAAATCTAACATCTAGCGCTCACCATC\n+ATCAAACTTTAGGAACAATAGAAAGAAGCCACCGAACTTTTAATGAATAT\n+ATACGTTCATACATATCGGTTAACAAAAGTGATTGGGACATTTGGTTACC\n+ATATTTCACTTATTGCTTCAATACAACACCCTCAATAGTCCATGACTATT\n+GCCCATACGAACTAGTATTTGGCAGACTACCCAGACAATTCAAAGATTTC\n+AGTAAGATAAACAAAATAGACCCAATATACAACTTAGACGACTACTCTAA\n+AGAGCTTAAATGCAGACTAGAATTGTCGTACAACAGAGCAAGAAGAATGT\n+TAGAAAAAGCAAAAGCGGATAGAAAATTAAGATATGATAGGAATACAAAT\n+AATTTCGAATTAAAAATAGGAGATAAAGTATTACTTAGAAAAGAAACAGG\n+TCATAAGTTAGATAAAAGATATGAAGGTCCTTATGACGTAGTAGATATAG\n+GAATAAATGACAATATAACCATTAAAACAGGAAGTAAGAAACAACAAATA\n+GTACATAAAGATAGGCTAAAAAAGCACAAATAGAATGAAAAAAAAAAAGG\n+GCAATCAATGCCAAACCTTTCATAATAAAACTTAAATAACGGCCTGATCA\n+GCCAAAACAATATAACAAAGACATAGACATAATCGAATTTTTATTAATTC\n+AAAATACATACATATTTTTTCTTTATTCATTTAAAAATTCTATATCATAA\n+ATAATGTTAATTCATTAAAAATAATATTTAAGTAATTTTTATTTTATAAT\n+GGTAATATAGTTGATAGAAAATAACTTCATTTCTTTACGTTATTTTAAAA\n+AAGAGGGGAGGTGTAGTATGTGCATATATCGAGGGTACACTGTACCTATA\n+AGTACACAGCAACACTTAGTTGCATTGCATAAATAAATGTCTCAAGTGAG\n+CGTGATATAAGATCACCCATTTATGCTTTAAGCTAAGTCAGCATCCCCAC\n+GCTGGCCGCTGGCCATATATGCGCATAAGCTCTCTCTCTCTCTCTCTTAT\n+ACATATATATATACGCTGCTCTTCTGCCGCTGTCGACGGCGGCGCAGTCG\n+CAGTATTTAGGTAAGATTAGACACTCTGTAGAGGTTAAGCGGGCAGAACC\n+GTTTCTGCTACTCGAAGAGATAAGAAGAAATAAAAAGGTGGCCTGACGGC\n+TGCACCCAACTGCAAGGAAAACACGTGTTCTCAATTGGTGGCATATATTG\n+GTTTATTACA\n' |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_consensus.fa --- a/test-data/test_consensus.fa Wed Oct 26 07:24:45 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,1143 +0,0 @@\n->FBgn0010302_Burdock\n-AGTTAACACAATCACAAAACACCCGAAATATAGTCGTAAGCCTCAAGTGC\n-TTTTCCCATCTATAGATCGAGCTTTACCTATAAGAAACTGTAACTTGTTA\n-AGCTTTAGAGATAAGAACTCTTGCTATACTTAAGTCAGTCGATTTTGGAA\n-GATTAGAAGCGTCGGTCATCGCCACGTACTTACTATTCGTCTCATTAAGT\n-GCAGACCGCGCAAGCCTATTGTAATTAATAAACTTACGCTAATAAATATA\n-TGGAAAATCTACTAAAATGATAATTGGCGCCCAAACGGATATAAAAACCT\n-ACGATAACTGAATAATTATAAATAAATAACAAAAGGAGGATCCGGAGACA\n-AAACCAGCGGCTTTGGCTAATTAACTCTAACCTAAGAAATAAAAATTTGC\n-TGATTACATAAAATATAATATTAATTACTAAGACCATCTACCTTAAAATT\n-GTTTGTTAATCACTATTATTATATTGTAAGTATAACGCTTATTGAACGAA\n-TTAAAAATATTATTATTATTATTATATTATAACCTATGCAAAGAGTATTG\n-ATAATAAAAATACATGAGTGACAGTGATAACCTTTTAGACAACCTAGTGT\n-CAAGCTTAAATAAATGGTCAGCGCACCAGGCAAGTAGGCAAAACAGTGCA\n-GAAAAAAATAATAAGTCATCAGATAATTGGTGGTCAAAAACAAAGACAAC\n-TAGCGAAATGGAATTTGAAGCTCAGTTAAAAGCGATCGTAGAGAGTGCTG\n-TTGCCGGTGCGCTCGCAGTCCAAAAACAATCATTTGAAAAGCAATTGCAG\n-GAGATGAATGAGCGAATCGGGAAATTAACAGTGAACACCCCAGAGGTGGA\n-AACTTATGTAGATGCTGAAATTAGACCAGGTGTTGTCTGTAGCGAGCCTC\n-TAGATATACTTAAATCTCTGCCAGATTTTGATGGCAAAAGTGAAACATAT\n-GTGTCGTGGAGAAAAGCGGCTCATGTCGCTTTTAAAGTTTTCAAAGATTA\n-CGAGGGAAGTTCAACATTTTACCAAGCTCTTGGTATTATGCGAAATAAAA\n-TAAAAGGTCCAGCGAATACAGTATTGGCTTCTTTTAATACTCCGTTACAT\n-TTCAAAGCAATGATCAGCCGTCTTGATTTCACATATTCTGACAAAAGGCC\n-GATCTATCTAATCGAACAAGAGCTATCAACTTTGCGACAGGGAGACATGA\n-CTCTTACTGAATTCTACGATGAAGTCGAGAAAAAACTGACCCTACTTACC\n-AACAAGACAATAATGACATTTGATAGTGCCTTGGCGATGTCACTGAATGA\n-AAAGTACAGGACGGACGCGTTACGTGTATTTGTAACCGGAGCTAAGAAAT\n-CGTTGAGCGACATTCTTTTTGCAAAAGGTCCAAAAGATTTACCAACTGCT\n-CTCGCTTTAGCGCAAGAGGTCGAGTCGAACCATGAGCGTTACCAATTCGC\n-CCTTATTTATTCTAAAAATATTGGAGACAGGGGTCAGAAAATCGAACAAA\n-GGCACAGCGATAAGGATAGAAACTCAATCATGCCCATGCAAACTAAAAAC\n-CCATATTTTAGCAAGCGTCAGGTGCATACTTATGATAACCAGGAAAGACA\n-AGATCCAGTCCAGTTAACAAATCCTGATGTATCCATGCGATCTAGAAGAA\n-CTGGAAATTTTGGACAAACTCCATTTCCGACTCAGGGAAATATTTGGCCA\n-TCCCAACAGCAAAATTCTTGGCCATCTCAACAACAATATTCTTGGCCATC\n-CCAACAACAAAATTCATTTCGAACACAAAATCAATTCGCATCGCAACCCC\n-AACAGCAAAACACAAGTCAGGCTCAGGGACATTTTGGGTATGCGCAAGCA\n-TCAAAAAGACCAACGAGTGGCAGTGCAAGGTTTACAGGGCCAAAACAGCA\n-GAGGATCAACTACTTACCTCATGAGAAAGGTCAATGTGAGGAAGATACAG\n-ACGGTTATCAAAAGGAGGCAGAAGCGGAGGTTGATGATTATGAGGACGAA\n-CTAGTGAATTACGATCATGTTCATTTTTTAGCCACAAATCCCTGCTACCG\n-TACATAGAAAGAGAGATAGCAGGGAGAACCATAAAACTTTTGATTGACAC\n-CGGGGCTTCGAAAAATTACATACAGCCCCTCCCTGAATTAAAAAACATAA\n-TGCCGGTACAAAATAAATTCACGGTAAAATCGCTTCATGGTTGCAACACC\n-GTCAAACAGAAATGCTTTATTAAGCTATTTAACACATCTGTTCAATTCTT\n-TATTCTTCCAAGTCTCTCTAGTTTTGACGCAATAATAGGACTTGACCTTT\n-TGAAACAGGGAAATGCAACGTTAGATTTTAAGAACAAAACGTTGAATATC\n-AACAATGAAGTGGAATCTATTCAGTTTTTGAGATGTGACAGCGTAAATTT\n-CGCCAACATAGAGAATATTGTGGTTCCAAATCAGATATCTAATAAATTCC\n-ATACAATGCTTCGAAACCGATTGGCCGTCTTTGCGGAACCGGAAGAAGCA\n-CTGCCGTATAATACCAACATTGTTGCCACAATACGTACTGAGGACGACCA\n-ACCCATTTACTCAAAACTCTATCCGTACCCCATGGGCGTATCGGATTTTG\n-TGAATAAGGAGACACATGCTTTGTTAAAGGACGGAATTATCAGGCCCTCG\n-TCGTCACCTTACAACAATCCGGTTTGGGTAGTCGATAAAAAAGGTACAGA\n-TGAAGAGGGAAATACTAAGAAAAGGTTGGTTATAGATTTTAGAAAACTAA\n-ATTTAAAAACAATCGACGACAAGTACCCTATACCAAACGTAGTATGGATC\n-TTGTCAAATTTGGGAAAAGCCAGATTCTTTACAACCCTTGACCTTAAATC\n-GGCGTTTCACCAAATTCTGCTCGCAGAAAAGGATAGAGCGAAAACTGCCT\n-TTTCAGTAGGAAATGGAAAATACGAGTTTTGCCGTTTGCCGTTTGGCTTG\n-AAAAATGCCCCAAGTATTTTTCAACGTGCTATTGATGATGTTGTTAGGGA\n-CCGTATAGGAAAGTCATGTTACGTTTACGTTGACGACGTAATAATATTTT\n-CAAACGGAATTGAGGACCACGTAAACGACGTTGCTTGGGTACTAGACAGA\n-CTGTCTGGGGCAAACATGAGGGTTTCTAAAGAGAAATCGTTTTTCTTCAA\n-GGAAAGCGTCGAGTATCTCGGATTCATGGTGTCAAGTGGAGGTATCACAA\n-CCAGTCCTAGCAAAGTAGAGGCTATTCAGAAATATAATCAACCTACTAAT\n-CTGTTTAGTGTTCGATCGTTTTTAGGGCTAGCAAGTTATTACCGCTGCTT\n-TATTAAGGACTTCGCCTCTATTGCTAGACCACTCACTGACATTCTGAAGG\n-GTGAAAACGGAAAGGTTTCCGCAAGCCAGTCTAAAAAGATACCAATTTCT\n-TTCGATGAAAGACAATGTTCTGCTTTTGAGAAGCTTAAAAATGTTCTTGT\n-CTCCGAAAATGTAATGTTATTGTATCCCGATTATAGAAAAGCCTTTGACT\n-TAACAACAGACGCTTCGGCTTTTGGCCTGGGGGCAGTCTTATCACAGGAT\n-GGCAAGCCTGTTACAATGATTTCGAGAACTTTACAGGATAGAGAACTTAA\n-TTTCGCAACAAATGAACGAGAACTTTTGGCCATCGTTTGGGCTTTAAAGT\n-CTCTTAGGAACTATCTATATGGTGTCAAAAACTTAAACATTTTTACAGAT\n-CACCAGCCGTTAACATACGCCGTGTCAGATAGGAATCCAAATGCAAAAAT\n-CAAGAGATGGAAGGCGTTTATAGACGAACATAATGCTAAAATTTTCTATA\n-AACCT'..b'AAGTAATCAGACAAGTCAAATTACTCACTAACGA\n-AAAAACGGTGGTAGTACCAAATCAGGAGCTGCAACCAGGTATAATAGTAG\n-CAAGCACCATTGCCGATAGCAAAAACGCATTGATTCGCATTATAAATACA\n-AATAATAAAGACGCCATAATAGATAGCGCGAAGATCAAATGCGAATCAAT\n-GAAAGACTATGACATTTTTACAACACCAGTAGAAAAGGAAAATAGAACTG\n-AAGAAATTTTAAAACAATTAAGATTCCCTAAACAATTCAATAATGAACTA\n-ACTAAGTTATGCACCGAGTTTAGCGATATTTTTGGTCTAGAAACAGAACC\n-AATATCGGCTAACAATTTCTACAAACAAAAACTCAGATTAGGGGAAAAAA\n-CACCGGTCTATATAAAAAACTATCGCATGGCAGATAGCCAAAAACCAGAA\n-ATCGCCAGACAGGTAAAAAAATTAATAGATGATGGAATAGTTGAACCATC\n-AATGTCTGAATATAATAGTCCATTACTTTTGGTTCCAAAGAAACCACTTC\n-CGAATTCCACGGAAAAAAGATGGCGATTAGCAGTTGACTATCGTCAAATA\n-AATAAGAAACTATTATCAGACAAATTTCCACTTCCAAGAATAGAAGATAT\n-TCTTGATCAATTAGGAAGAGCAAAGTATTTTTCATGTCTCGACCTAATGT\n-CTGGATTCCACCAGATAGAACTAGAAAAAAGGTATAGAGATATAACGTCA\n-TTTTCAACAGCCAATGGCTCATATCGCTTCACGCGATTACCATACGGACT\n-GAAAGTAGCACCAAACTCCTTCCAACGTAGGATGACACTTGCATTTTCTG\n-GTCTTGAACCATCGCAAGCATTTCTATATATGGATGACTTAGTAGTAATA\n-GGTTGTTCAGAAAAACATATGCTCAAAAATTTGACTAACGTATTCGAGCT\n-ATGTAGACGACATAATTTGAAACTACATCCAGGGAAATGTTCTTTCTTTA\n-TGAAAGAAGTAACATATTTGGGTCACAAATGTACCGATAAAGGTATACTC\n-CCAGATGACACCAAATATGAAGTTATAGAAAAATATCCTATACCAACAGA\n-TGCCGACAGTGCTAGGCGTTTCGTAGCCTTCTGTAATTATTACAGACGTT\n-TCATTAAAAATTTTTCTGATCATTCACGCCACTTAACGAGGCTTTGTAAA\n-AAGAATGTTCAATTCGAATGGACAGCAGAATGCAATGATGCATTCGAATA\n-CCTTAAAACAGAATTAATGAAACCAACATTACTACAGTACCCAGATTTCG\n-GTAAAGAATTTTGCATAACAACCGATGCTAGTAAACAGGCATGCGGAGCG\n-GTACTTACACAAGATCACAATGGTCAACAACTTCCAGTGGCATACGCTTC\n-AAGAATGTTCACTCAAGGTGAAAGTAATAAGTCCACTACAGAACAAGAAT\n-TAACGGCCATTCATTGGGCCATAAATCATTTTCGACCATACATATATGGC\n-AAGCATTTCATGGTAAAAAGCGATCATAGACCATTGTCATACCTATTCTC\n-TATGAAAAATCCAAGTTCAAAACTCACTCGTATGAGGCTGGATTTAGAAG\n-AGTATGACTTTACTGTAGAATATCTTAAGGGGAAAGATAACCATATTGCG\n-GACGCCTTGTCTCGCATAACAATAAAAGATCTGAAAACAATCAACAGAGA\n-AATATTAAAAGTTACCACCAGATCAAAAGCTAAACAGGAAAATTCCTGTA\n-AGGACGAAGCAATAGTCAAAATACAAGAGGAAAAAGAGCAAACAATAGAA\n-AAGCCCAAAGTCTATGAAGTTGTCAATAATAATGACACAAAGAAATATGT\n-TTTAATCAAAATAGATAAACACAAGTGTTTATTAAAACGAGGAAAAACAA\n-TTGTTTCACGCTTTGATGTTGATGACTTGTATTCTAATGAAACATTTGAT\n-CTAAATCAATTCTTTCAAAGGCTTATTTCAAAAGCCGGAATGCATAAAAT\n-AACAAAAATGCGAATATCACCAAGCGAACAGATGTTCCAATTTGTATCAC\n-TAAATGAATTTAAAATAAAGGGCAACCGAGTACTCGAAAAAGTAGAACTA\n-GCTATTCTACAAAAGGTGATAATTATAGACAAAAATGACGAAGCTCAGAT\n-TAAAGAAATTTTGACAAAATTCCATGATGATCCTATAGAAGGAGGCCACA\n-CTGGTATTTCGCGAACCCAGTCAAAAATCAAAAGATTTTATTATTGGCCC\n-CAGATGACCAAGACAATCTCAAAGTATGTAAAGACTTGTTTGAAATGTCA\n-ACAAGCCAAAATTACAACACATACGAAAACTCCATTAACATTGATGCCAA\n-CGCCAGCAACAGCATTTGATACTGTTTTAATTGATACCATTGGTCCACTA\n-CCGAAATCGGAAGACGGAAATGAGTATGCAGTTACAATCATATGCGATCT\n-AACCAAGTTTTTAGTAACTATTCCAACACCAAATAAAAGTGCTAAAACAG\n-TTGCAAAGGCTATATTTGAATTATTTGTACTGAAGTACGGTCCAATGAAG\n-ACGTTCATTACAGATCAAGGTACGGAATACAAAAATTCACTTATGAATGA\n-ATTATGCAAATATATGCATATAGAAAATCTAACATCTAGCGCTCACCATC\n-ATCAAACTTTAGGAACAATAGAAAGAAGCCACCGAACTTTTAATGAATAT\n-ATACGTTCATACATATCGGTTAACAAAAGTGATTGGGACATTTGGTTACC\n-ATATTTCACTTATTGCTTCAATACAACACCCTCAATAGTCCATGACTATT\n-GCCCATACGAACTAGTATTTGGCAGACTACCCAGACAATTCAAAGATTTC\n-AGTAAGATAAACAAAATAGACCCAATATACAACTTAGACGACTACTCTAA\n-AGAGCTTAAATGCAGACTAGAATTGTCGTACAACAGAGCAAGAAGAATGT\n-TAGAAAAAGCAAAAGCGGATAGAAAATTAAGATATGATAGGAATACAAAT\n-AATTTCGAATTAAAAATAGGAGATAAAGTATTACTTAGAAAAGAAACAGG\n-TCATAAGTTAGATAAAAGATATGAAGGTCCTTATGACGTAGTAGATATAG\n-GAATAAATGACAATATAACCATTAAAACAGGAAGTAAGAAACAACAAATA\n-GTACATAAAGATAGGCTAAAAAAGCACAAATAGAATGAAAAAAAAAAAGG\n-GCAATCAATGCCAAACCTTTCATAATAAAACTTAAATAACGGCCTGATCA\n-GCCAAAACAATATAACAAAGACATAGACATAATCGAATTTTTATTAATTC\n-AAAATACATACATATTTTTTCTTTATTCATTTAAAAATTCTATATCATAA\n-ATAATGTTAATTCATTAAAAATAATATTTAAGTAATTTTTATTTTATAAT\n-GGTAATATAGTTGATAGAAAATAACTTCATTTCTTTACGTTATTTTAAAA\n-AAGAGGGGAGGTGTAGTATGTGCATATATCGAGGGTACACTGTACCTATA\n-AGTACACAGCAACACTTAGTTGCATTGCATAAATAAATGTCTCAAGTGAG\n-CGTGATATAAGATCACCCATTTATGCTTTAAGCTAAGTCAGCATCCCCAC\n-GCTGGCCGCTGGCCATATATGCGCATAAGCTCTCTCTCTCTCTCTCTTAT\n-ACATATATATATACGCTGCTCTTCTGCCGCTGTCGACGGCGGCGCAGTCG\n-CAGTATTTAGGTAAGATTAGACACTCTGTAGAGGTTAAGCGGGCAGAACC\n-GTTTCTGCTACTCGAAGAGATAAGAAGAAATAAAAAGGTGGCCTGACGGC\n-TGCACCCAACTGCAAGGAAAACACGTGTTCTCAATTGGTGGCATATATTG\n-GTTTATTACA\n' |
b |
diff -r 6e02b9179a24 -r 9672fe07a232 test-data/test_insertions_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_insertions_out.bed Mon Dec 05 09:58:47 2016 -0500 |
b |
@@ -0,0 +1,11 @@ +Chr Start End TransposonName TransposonDirection Class VariantSupport Frequency Junction1 Junction1Support Junction2 Junction2Support 5'_Support 3'_Support +chr2L 2003846 2003873 FBgn0003122_pogo sense 1p1 39 1.0000 2003871 2 2003871 5 18 14 +chr2L 2131300 2131312 FBgn0001283_jockey sense 1p1 37 1.0000 2131304 1 2131306 6 11 19 +chr2L 2397913 2397938 FBgn0000155_roo sense 1p1 47 1.0000 2397942 5 2397943 5 19 18 +chr2L 2412909 2412937 FBgn0003055_P-element sense 1p1 45 0.9783 2412907 2 2412908 3 21 19 +chr2L 2569095 2569595 FBgn0004141_HeT-A sense 2p 6 1.0000 2569345 0 2569345 0 6 0 +chr2L 2714434 2714458 FBgn0000349_copia sense 1p1 40 1.0000 2714437 8 2714440 4 12 16 +chr2L 2763527 2763539 FBgn0000199_blood sense 1p1 39 0.9070 2763533 0 2763533 0 19 20 +chr2L 2920516 2920519 FBgn0010302_Burdock sense 1p1 39 1.0000 2920517 4 2920518 4 15 16 +chr2L 2965217 2965244 FBgn0001167_gypsy sense 1p1 43 0.9348 2965230 0 2965230 0 24 19 +chr2L 2966906 2966920 FBgn0000481_Doc sense 1p1 50 0.9804 2966910 2 2966911 3 32 13 |