Repository 'transcriptomics_easy_for_discovery_toolkit'
hg clone https://toolshed.g2.bx.psu.edu/repos/bioitcore/transcriptomics_easy_for_discovery_toolkit

Changeset 1:2d7ec1cb4139 (2017-09-07)
Previous changeset 0:3fc5784c1ce0 (2017-09-07) Next changeset 2:40b8b1f9ea7d (2017-09-07)
Commit message:
planemo upload
added:
SpliceTrap.tar.gz
removed:
SpliceTrap/PostAnalysis
SpliceTrap/SpliceChange
SpliceTrap/SpliceTrap.pl
SpliceTrap/TXdbgen
SpliceTrap/bin/ApplyCutoff.jie.pl
SpliceTrap/bin/Pair_estimate_c
SpliceTrap/bin/PostAnalysis
SpliceTrap/bin/PostAnalysis.pl
SpliceTrap/bin/SpliceChange
SpliceTrap/bin/SpliceChange.pl
SpliceTrap/bin/SpliceTrap
SpliceTrap/bin/SpliceTrap.pl
SpliceTrap/bin/SpliceTrap_measure.pl
SpliceTrap/bin/TXdbgen
SpliceTrap/bin/TXdbgen.pl
SpliceTrap/bin/apply_cutoff.sh
SpliceTrap/bin/batch_para_cov10p_fit.sh
SpliceTrap/bin/batchqsub.pl
SpliceTrap/bin/batchqsub.pl_orig
SpliceTrap/bin/beta_fit.R
SpliceTrap/bin/bowtie2eland.pl
SpliceTrap/bin/calc_pval.R
SpliceTrap/bin/downloaddb.pl
SpliceTrap/bin/get.frag.size.pl
SpliceTrap/bin/get.hist.pl
SpliceTrap/bin/get_bed_fa_j.pl
SpliceTrap/bin/get_event_dist_fit.pl
SpliceTrap/bin/gtf2bed.pl
SpliceTrap/bin/mapping_bowtie.sh
SpliceTrap/bin/mapping_rmap.sh
SpliceTrap/bin/mark.mt.4eland.pl
SpliceTrap/bin/rmap2eland.pl
SpliceTrap/bin/scan_nomt.pl
SpliceTrap/bin/scanbed2txdb.pl
SpliceTrap/bin/splitdb.sh
SpliceTrap/bin/vslz.pl
SpliceTrap/cutoffs/cutoff.pair.06.txt
SpliceTrap/cutoffs/cutoff.pair.07.txt
SpliceTrap/cutoffs/cutoff.pair.08.txt
SpliceTrap/refGenes.bed
SpliceTrap/splice_trap.xml
SpliceTrap/src/Makefile
SpliceTrap/src/splicetrap.estimate.cpp
SpliceTrap/test-data/input1.fastq
SpliceTrap/test-data/input2.fastq
SpliceTrap/test-data/output1.txt
SpliceTrap/test-data/output2.txt
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap.tar.gz
b
Binary file SpliceTrap.tar.gz has changed
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/PostAnalysis
--- a/SpliceTrap/PostAnalysis Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,66 +0,0 @@
-#!/usr/bin/perl
-my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
-# this script is a wrapup for Post analysis based on the ratio file output
-
-use strict;
-use Getopt::Long;
-my $RatioFile="";
-my $OutputFile = "";
-my $JunctionCut=5;
-my $CutoffLevel="M";
-my $noIRM = 0;
-my $noIRMstr="";
-
-GetOptions (
- "i:s"=>\$RatioFile,
- "o:s"=>\$OutputFile,
- "c:s"=>\$CutoffLevel,
- "noIRM|noirm"=>\$noIRM,
- "j:i"=>\$JunctionCut
-);
-
-my $InputParaDes=" Usage of the script:
- -i      input file (.ratio file)
- -o      output file
- -c      Cutoff Level:H/[M]/L
- Means High, Middle or Low
- -j Junction reads per junction requirement for each exon-isoform [5]
- --noIRM Use the unadjusted inclusion ratios (before IRM correction)
-";
-
-if($RatioFile eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")
-{
- print $InputParaDes;
- exit;
-}
-if($noIRM)
-{
- $noIRMstr= "noirm";
-}
-
-
-system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut  $noIRMstr >$OutputFile.raw");
-
-open(rawfile, "$OutputFile.raw");
-open(outfile, ">$OutputFile");
-while(my $line=<rawfile>)
-{
- chomp($line);
- my @a=split("\t",$line);
- if($noIRM)
- {
- print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n";
- }
- else
- {
- print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n";
- }
-}
-close(outfile);
-close(rawfile);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/SpliceChange
--- a/SpliceTrap/SpliceChange Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,176 +0,0 @@
-#!/usr/bin/perl
-my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
-# compare two outputs from SpliceTrap
-use strict;
-
-# the information needed
-# inclusion ratio input file
-# filtered out or not input file
-# minimal inclusion ratio at least 0.1 for one condition
-# minimal splicing changes parameter
-# orignial pipeline written by Martin Akerman
-# re-organized and re-written by Jie Wu
-
-use FileHandle;
-
-use Getopt::Long;
-
-my @programs = ('grep','mkdir','R','paste','awk','sort');
-foreach my $program (@programs)
-{
-        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-}
-
-
-my $InputFileName1 = "";
-my $InputFileName2 = "";
-my $OutputFileName = "";
-my $minchange = 0.3;
-my $mininc = 0.1;
-my $noIRM = 0;
-
-
-GetOptions (
- "1:s"=>\$InputFileName1,
- "2:s"=>\$InputFileName2,
- "o:s"=>\$OutputFileName,
- "noIRM|noirm"=>\$noIRM,
- "m:f"=>\$mininc,
- "c:f"=>\$minchange
-);
-
-my $InputParaDes=" Usage of the script:
- -1 input file 1, output from SpliceTrap, *.raw file in the output folder 
- -2 input file 2. see above.
- -o output file prefix.
- -c minimal change required, [default:0.3]
- -m minimal inclusion ratio for at least one condition. [defualt:0.1]
- --noIRM Use the unadjusted inclusion ratios (before IRM correction)
-";
-
-if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-
-
-if(-d "$OutputFileName.cache" )
-{
- print "Aborted! output cache folder exists: $OutputFileName.cache \n";
- exit;
-}
-else
-{
- system("mkdir $OutputFileName.cache");
-}
-
-#
-my %ir1; # records ir from file1
-my %ir2; # records ir from file2
-# only records trios above the cutoffs
-
-open(input1, $InputFileName1) or die "$InputFileName1 open error!\n";
-while(my $line=<input1>)
-{
- chomp($line);
- my @a = split("\t", $line);
- if($a[21] ne "na")
- {
- if($noIRM)
- {
- $ir1{$a[0]} = $a[1];
- }
- else
- {
- $ir1{$a[0]} = $a[2];
- }
- }
-}
-print scalar(keys (%ir1) )," records loaded from $InputFileName1\n";
-close(input1);
-
-open(input2, $InputFileName2) or die "$InputFileName2 open error!\n";
-while(my $line=<input2>)
-{
-        chomp($line);
-        my @a = split("\t", $line);
-        if($a[21] ne "na")
-        {
- if($noIRM)
- {
- $ir2{$a[0]} = $a[1];
- }
- else
- {
-                 $ir2{$a[0]} = $a[2];
- }
-        }
-}
-print scalar(keys (%ir2) )," records loaded from $InputFileName2\n";
-
-
-close(input2);
-
-
-##
-my %mean;
-my %sd;
-
-my %num;
-
-my %filehandles;

-my @types = ("CA", "IR", "AD","AA");
-
-foreach my $type (@types)
-{
- my $fh = new FileHandle;
- open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n";
- $filehandles{$type} = $fh;
-}
-
-
-foreach my $key (keys %ir1)
-{
- if(exists $ir2{$key})
- {
- if(($ir1{$key} + $ir2{$key}) > 0)
- {
- #find the type
- my $type = substr($key, 0, 2);
- $type = "CA" if $type eq "CS";
- $num{$type}++;
-
- my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key});
- $mean{$type} = $mean{$type} + $change;
- $sd{$type} = $change*$change + $sd{$type};
-
- $change = sprintf("%.4f",$change);
-
- my $fout =  $filehandles{$type};
- print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n";
- }
- }
-}
-
-foreach my $type (keys %filehandles)
-{
- close($filehandles{$type});
- if($num{$type} == 0)
- {
- warn  "no AS events passed filters for both files\n";
- next;
- }
- $mean{$type} = $mean{$type}/$num{$type};
- $sd{$type} = sqrt($sd{$type}/$num{$type});
- system("R  --slave --args  $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R");
- system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report");
- print "$num{$type} $type events processed...\n";
- #print $mean{$type},"\t",  $sd{$type} ,"\t",$num{$type},"\n";
-
-}
-system("rm $OutputFileName.cache -rf");
-
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/SpliceTrap.pl
--- a/SpliceTrap/SpliceTrap.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,285 +0,0 @@\n-#!/usr/bin/perl\n-# Author: wuj@cshl.edu\n-# Modified: Baekdoo Kim (baegi7942@gmail.com)\n-use strict;\n-use Getopt::Long;\n-use Data::Dumper;\n-####################\n-use Cwd;\n-my $PROG = $0;\n-my $CUR_DIR = Cwd::abs_path(Cwd::cwd());\n-my $PROG_ABS_PATH = Cwd::abs_path($PROG);\n-#my $SrcFolder=`dirname $PROG_ABS_PATH`;\n-#chomp($SrcFolder);\n-#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";\n-#my $SrcFolder=$config{SrcFolder};\n-\n-my @programs = (\'R\',\'echo\',\'cat\',\'bash\',\'perl\',\'ln\',\'mkdir\',\'paste\',\'grep\',\'sort\',\'basename\',\'awk\',\'wc\',\'mv\',\'cd\',\'rm\',\'split\',\'head\' );\n-foreach my $program (@programs)\n-{\n-        die ("CHECK: $program not found\\n") if(system("hash $program >/dev/null"));\n-\n-}\n-\n-####################\n-my $SrcFolder="";\n-my $MapSoftware="bowtie";\n-my $DatabasePrefix="hg38";\n-my $ReadFileFormat="";\n-my $ReadFile1Name="";\n-my $ReadFile2Name="";\n-my $CutoffLevel="M";\n-my $Outputfolder=$CUR_DIR;\n-my $OutputPrefix="Result";\n-#my $CutoffOnly=0;\n-my $ReadSize=36;\n-my $JunctionCut=5;\n-my $onGalaxy_raw="";\n-my $onGalaxy_txt="";\n-my $BowtieThreads=1;\n-my $noIRMstr="";\n-my $noIRM = 0;\n-\n-my $num_args = $#ARGV;\n-$onGalaxy_raw = $ARGV[$num_args-1];\n-$onGalaxy_txt = $ARGV[$num_args];\n-\n-GetOptions (\n-\t"l:s"=>\\$SrcFolder,\n-        "m:s"=>\\$MapSoftware,\n-        "d:s"=>\\$DatabasePrefix,\n-#       "f:s"=>\\$ReadFileFormat,\n-        "1:s"=>\\$ReadFile1Name,\n-        "2:s"=>\\$ReadFile2Name,\n-        "c:s"=>\\$CutoffLevel,\n-        "outdir:s"=>\\$Outputfolder,\n-        "o:s"=>\\$OutputPrefix,\n-        "j:i"=>\\$JunctionCut,\n-        "s:i"=>\\$ReadSize,\n-        "p:i"=>\\$BowtieThreads,\n-        "noIRM|noirm"=>\\$noIRM\n-#       "local:s"=>\\$local,\n-#       "rerun"=>\\$CutoffOnly\n-);\n-#-O for galaxy output\n-\n-\n-my $InputParaDes="      Usage of the script:\n-\t-l\tBase Location (required)\n-        -m      Mapping software: [bowtie]/rmap\n-        -d      Database prefix: [hg18]/mm9/rn4/userdefined\n-        -1      Read File 1\n-        -2      Read File 2\n-        -c      Cutoff Level:H/[M]/L\n-                Means High, Middle or Low\n-        -j      Junction reads requirement per junction for each exon-isoform [5]\n-        -o      Output prefix {Result}\n-        -s      Read Size [36]\n-        --outdir Output folder [./]\n-        -p      Bowtie parameter, threads number, only use this when you don\'t use qsub [1]\n-        --noIRM Skip the IRM correction step\n-        \n-        This is a quick help, please refer to the README file for details.\n-";\n-\n-\n-if($SrcFolder eq "") {\n-\tprint "[CHECK] - Please provide the location of the script (option \'-l\')\\n\\n";\n-\texit;\n-}\n-\n-if($ReadFile2Name eq "")\n-{\n-        $ReadFile2Name = $ReadFile1Name;\n-        #trigger singled end mode\n-}\n-\n-if($ReadFile1Name eq "" or $ReadFile2Name eq "" )\n-{\n-        print $InputParaDes;\n-        exit;\n-}\n-\n-if($BowtieThreads < 1)\n-{\n-        print $InputParaDes;\n-        exit;\n-}\n-\n-if (! -e "$SrcFolder/db/$DatabasePrefix/parallel")\n-{\n-        print "CHECK: Error, the database you specified is not properly installed.\\n";\n-        #print $InputParaDes;\n-        exit;\n-\n-}\n-\n-if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")\n-{\n-        print $InputParaDes;\n-        exit;\n-}\n-\n-\n-$ReadFile1Name = Cwd::abs_path($ReadFile1Name);\n-$ReadFile2Name = Cwd::abs_path($ReadFile2Name);\n-\n-#check the files\n-open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\\n");\n-my $checkoneline = <check>;\n-if(substr($checkoneline,0,1) eq ">")\n-{\n-        $ReadFileFormat = "fasta";\n-}\n-elsif(substr($checkoneline,0,1) eq "@")\n-{\n-        $ReadFileFormat = "fastq";\n-}\n-else\n-{\n-        die("CHECK: ERROR:Please check $ReadFile1Name\\n");\n-}\n-close(check);\n-\n-open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\\n");\n-my $checkoneline = <check>;\n-if(substr($checkoneline,0,1) eq ">")\n-{\n-        die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\\n") if ($ReadFileFormat ne "fasta");\n-}\n-elsif(substr($checkoneline,0,1) eq "'..b'print "CHECK: checking rmap...\\n";\n-        if(system("type rmap &>/dev/null") ==0 )\n-        {\n-                print "CHECK: rmap found, continue\\n";\n-        }\n-        else\n-        {\n-                die "CHECK: No rmap found in PATH, EXIT!\\n";\n-        }\n-}\n-else\n-{\n-        die "CHECK: option -m only takes rmap or bowtie as inputs\\n";\n-}\n-\n-if($ReadSize == 0)\n-{\n-        die "CHECK: Please check option -s Read size\\n";\n-}\n-\n-if($noIRM)\n-{\n-        $noIRMstr= "noirm";\n-}\n-\n-#write more checks later\n-print "PARAMETERS:\\tMapping software:  ",$MapSoftware,"\\n";\n-print "PARAMETERS:\\tDatabase prefix:   ",$DatabasePrefix,"\\n";\n-print "PARAMETERS:\\tRead end 1:        ",$ReadFile1Name,"\\n";\n-print "PARAMETERS:\\tRead end 2:        ",$ReadFile2Name,"\\n" if($ReadFile2Name ne $ReadFile1Name);\n-print "PARAMETERS:\\tGalaxy_raw:        ",$onGalaxy_raw,"\\n"; #if($onGalaxy_raw ne "");\n-print "PARAMETERS:\\tGalaxy_txt:        ",$onGalaxy_txt,"\\n"; #if($onGalaxy_txt ne "");\n-print "PARAMETERS:\\tCutoff level:      ",$CutoffLevel,"\\n";\n-print "PARAMETERS:\\tJunction reads.min:",$JunctionCut,"\\n";\n-print "PARAMETERS:\\tOutput folder:     ",$Outputfolder,"\\n";\n-print "PARAMETERS:\\tOutput prefix:     ",$OutputPrefix,"\\n";\n-print "PARAMETERS:\\tRead size:         ",$ReadSize,"\\n";\n-print "PARAMETERS:\\tBowtie threads #:  ",$BowtieThreads,"\\n";\n-print "PARAMETERS:\\tNo IRM.\\n" if ($noIRM);\n-\n-if($MapSoftware eq "bowtie")\n-{\n-        print "=================STAGE 1 MAPPING===================\\n";\n-        system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");\n-        system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);\n-        print "=================STAGE 2 ESTIMATION================\\n";\n-        #  ratio, log, nums\n-        system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n-        print "=================STAGE 3 CUTOFF====================\\n";\n-        #   raw\n-        system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n-\n-\n-}\n-\n-if($MapSoftware eq "rmap")\n-{\n-        print "=================STAGE 1 MAPPING===================\\n";\n-\n-        system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;\n-        system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);\n-        print "=================STAGE 2 ESTIMATION================\\n";\n-\n-        system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n-        print "=================STAGE 3 CUTOFF====================\\n";\n-        system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n-\n-\n-}\n-\n-#print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\\n";\n-\n-if($onGalaxy_raw ne "" && $onGalaxy_txt ne "")\n-{\n-        print "OUTPUTFILE:$OutputPrefix.raw\\n";\n-        system("grep -v na $Outputfolder/$OutputPrefix.raw >$onGalaxy_raw");\n-        print "OUTPUTFILE:$OutputPrefix.txt\\n";\n-        system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy_txt");\n-}\n-\n-print "============Clean up\\n";\n-system("rm -r $Outputfolder/$OutputPrefix.*");\n-\n-sub random_sessid {\n-        #my @chars = (0..9,a..z,A..Z);\n-        my @chars = (\'a\'..\'z\',\'A\'..\'Z\');\n-        my $len = 10;\n-        my $string = join \'\', map {$chars[rand(@chars)]} (1..$len);\n-        return $string;\n-}                                                                                                                                            \n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/TXdbgen
--- a/SpliceTrap/TXdbgen Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,97 +0,0 @@
-#!/usr/bin/perl
-my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
-# this script is to generate TXdb database files from bed/gtf file
-
-use strict;
-use Cwd;
-use Getopt::Long;
-
-my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
-foreach my $program (@programs)
-{
-        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-
-}
-
-
-my $genomedir = "";
-
-my $annofilename = "";
-my $txdbname = "userdefined";
-my $knownonly = 0;
-my $gtfinput = 0;
-
-GetOptions (
- "g:s"=>\$genomedir,
- "a:s"=>\$annofilename,
- "n:s"=>\$txdbname,
- "gtf"=>\$gtfinput,
- "knownonly"=>\$knownonly
-);
-
-my $InputParaDes="      Usage of the script:
-        -g      genome fasta file location
- -a annotation file (bed/gtf)
- -n txdb name
- --gtf specify this if annotation file is in gtf format
-";
-
-if($genomedir eq "" or $annofilename eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-$genomedir = Cwd::abs_path($genomedir);
-$annofilename = Cwd::abs_path($annofilename);
-
-my $annofilebase = `basename $annofilename`;
-chomp($annofilebase);
-#need a cache folder to avoid mess
-
-my $cachefolder = $annofilebase.".cache";
-
-if (! -e $cachefolder)
-{
- mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
-}
-if($gtfinput)
-{
- print "TXDBGEN: converting gtf file into bed format\n";
- system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");
- $annofilename = "$cachefolder/$annofilebase.bed";
-}
-
-
-print "TXDBGEN: scan $annofilename for AS events...\n";
-system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
-print "TXDBGEN: fetch sequences from $genomedir...\n";
-system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
-#get fasta file list
-system("ls $genomedir/*.fa >$cachefolder/chr.list");
-
-system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
-
-print "TXDBGEN: generate files for parallel computing...\n";
-if (! -e "$cachefolder/parallel")
-{
- mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
-}
-system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
-system("rm $cachefolder/out.bed");
-system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
-system("rm $cachefolder/TXdb.tmp.evi");
-system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
-print "TXDBGEN: build Bowtie index...\n";
-
-if (! -e "$cachefolder/btw")
-{
- mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
-}
-system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
-system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
-print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
-
-system("mv $cachefolder $SrcFolder/../db/$txdbname");
-print "TXDBGEN: Done!\n";
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/ApplyCutoff.jie.pl
--- a/SpliceTrap/bin/ApplyCutoff.jie.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,108 +0,0 @@
-#apply our cuttoff hash table on the IR calculated by Jie
-#Modified from Martin's code
-use strict;
-
-use Cwd;
-my $PROG = $0;
-my $CUR_DIR = Cwd::abs_path(Cwd::cwd());
-my $PROG_ABS_PATH = Cwd::abs_path($PROG);
-my $SrcFolder=`dirname $PROG_ABS_PATH`;
-chomp($SrcFolder);
-
-my %cutoff;
-my @Exlen;
-
-my $cutoff_level=$ARGV[1];
-my $JunctionCut = $ARGV[2];
-my $noirm = $ARGV[3];
-
-my $cutoff_level_index=7;
-
- $cutoff_level_index=8 if $cutoff_level eq "H";
-$cutoff_level_index=6 if $cutoff_level eq "L";
-
-open(CUT,"$SrcFolder/../cutoffs/cutoff.pair.0".$cutoff_level_index.".txt") || die "cutoff file not found $!\n";
-
-while(<CUT>){
- chomp;
- my @a=split(/\t/,$_);
- push @Exlen,$a[0];
- $cutoff{$a[0]}=$a[1];
-}
-close(CUT);
-
-open(IN,$ARGV[0]);
-
-while(<IN>){
- chomp;
- my @a=split(/\t/,$_);
- my $Ez='Ez=yes';
- my $print=$_;
- if($a[0]=~m/#/g){next}
- my $eventid=substr($a[0],0,2);
- my $bir =$a[2];
- $bir =$a[1] if($noirm eq "noirm");
- my $j12 = $a[8];
- my $j23 = $a[9];
- my $j13 = $a[10];
- my $cov1=$a[11];
- my $cov2=$a[12];
- my $cov3=$a[13];
- my $siz1=$a[15];
- my $siz2=$a[16];
- my $siz3=$a[17];
-
-
- my $stat1='exon1='.cutoff($siz1,$cov1,\@Exlen,%cutoff);
- my $stat2='exon2='.cutoff($siz2,$cov2,\@Exlen,%cutoff);
- my $stat3='exon3='.cutoff($siz3,$cov3,\@Exlen,%cutoff);
- if($stat1 eq "exon1=yes" and $stat3 eq "exon3=yes") 
- {
- #$Ez="passed";
- $Ez=$eventid if $eventid eq "AA";
- $Ez=$eventid if $eventid eq "AD";
- $Ez=$eventid if $eventid eq "IR";
- if ($eventid eq "CS" or $eventid eq "CA" or $eventid eq "ME") 
- {
- if($bir >0.9)
- {
- $Ez = "CS";
- }
- else
- {
- $Ez = "CA";
- }
- }
-
- }
- else
- {
- #$Ez="declined";
- $Ez = "na";
- }
- if( ($j12<$JunctionCut or $j23<$JunctionCut) and $j13 <$JunctionCut)
- {
- $Ez = "na";
- }
- print $print,"\t",$stat1,"\t",$stat2,"\t",$stat3,"\t",$Ez,"\n";
-}
-close(IN);
-####################################################################
-
-sub cutoff{
- my($s,$c,$E,%cutoff)=@_;
- my @Exlen=@$E;
- if($c eq 'NA'){return('NA')}
- my $range=$Exlen[$#Exlen];
- foreach my $l(@Exlen){if($s<$l){$range=$l;last}}
-        if($c<$cutoff{$range}){return('no')}
- return('yes')
-}
-
-
-sub contain{
- my ($a,@a)=@_;
- foreach(@a){if($a eq $_){return(1)}}
- return(0)
-}
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/Pair_estimate_c
b
Binary file SpliceTrap/bin/Pair_estimate_c has changed
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/PostAnalysis
--- a/SpliceTrap/bin/PostAnalysis Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,66 +0,0 @@
-#!/usr/bin/perl
-my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
-# this script is a wrapup for Post analysis based on the ratio file output
-
-use strict;
-use Getopt::Long;
-my $RatioFile="";
-my $OutputFile = "";
-my $JunctionCut=5;
-my $CutoffLevel="M";
-my $noIRM = 0;
-my $noIRMstr="";
-
-GetOptions (
- "i:s"=>\$RatioFile,
- "o:s"=>\$OutputFile,
- "c:s"=>\$CutoffLevel,
- "noIRM|noirm"=>\$noIRM,
- "j:i"=>\$JunctionCut
-);
-
-my $InputParaDes=" Usage of the script:
- -i      input file (.ratio file)
- -o      output file
- -c      Cutoff Level:H/[M]/L
- Means High, Middle or Low
- -j Junction reads per junction requirement for each exon-isoform [5]
- --noIRM Use the unadjusted inclusion ratios (before IRM correction)
-";
-
-if($RatioFile eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")
-{
- print $InputParaDes;
- exit;
-}
-if($noIRM)
-{
- $noIRMstr= "noirm";
-}
-
-
-system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut  $noIRMstr >$OutputFile.raw");
-
-open(rawfile, "$OutputFile.raw");
-open(outfile, ">$OutputFile");
-while(my $line=<rawfile>)
-{
- chomp($line);
- my @a=split("\t",$line);
- if($noIRM)
- {
- print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n";
- }
- else
- {
- print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n";
- }
-}
-close(outfile);
-close(rawfile);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/PostAnalysis.pl
--- a/SpliceTrap/bin/PostAnalysis.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,64 +0,0 @@
-# this script is a wrapup for Post analysis based on the ratio file output
-
-use strict;
-use Getopt::Long;
-my $RatioFile="";
-my $OutputFile = "";
-my $JunctionCut=5;
-my $CutoffLevel="M";
-my $noIRM = 0;
-my $noIRMstr="";
-
-GetOptions (
- "i:s"=>\$RatioFile,
- "o:s"=>\$OutputFile,
- "c:s"=>\$CutoffLevel,
- "noIRM|noirm"=>\$noIRM,
- "j:i"=>\$JunctionCut
-);
-
-my $InputParaDes=" Usage of the script:
- -i      input file (.ratio file)
- -o      output file
- -c      Cutoff Level:H/[M]/L
- Means High, Middle or Low
- -j Junction reads per junction requirement for each exon-isoform [5]
- --noIRM Use the unadjusted inclusion ratios (before IRM correction)
-";
-
-if($RatioFile eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")
-{
- print $InputParaDes;
- exit;
-}
-if($noIRM)
-{
- $noIRMstr= "noirm";
-}
-
-
-system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut  $noIRMstr >$OutputFile.raw");
-
-open(rawfile, "$OutputFile.raw");
-open(outfile, ">$OutputFile");
-while(my $line=<rawfile>)
-{
- chomp($line);
- my @a=split("\t",$line);
- if($noIRM)
- {
- print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n";
- }
- else
- {
- print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n";
- }
-}
-close(outfile);
-close(rawfile);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceChange
--- a/SpliceTrap/bin/SpliceChange Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,176 +0,0 @@
-#!/usr/bin/perl
-my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
-# compare two outputs from SpliceTrap
-use strict;
-
-# the information needed
-# inclusion ratio input file
-# filtered out or not input file
-# minimal inclusion ratio at least 0.1 for one condition
-# minimal splicing changes parameter
-# orignial pipeline written by Martin Akerman
-# re-organized and re-written by Jie Wu
-
-use FileHandle;
-
-use Getopt::Long;
-
-my @programs = ('grep','mkdir','R','paste','awk','sort');
-foreach my $program (@programs)
-{
-        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-}
-
-
-my $InputFileName1 = "";
-my $InputFileName2 = "";
-my $OutputFileName = "";
-my $minchange = 0.3;
-my $mininc = 0.1;
-my $noIRM = 0;
-
-
-GetOptions (
- "1:s"=>\$InputFileName1,
- "2:s"=>\$InputFileName2,
- "o:s"=>\$OutputFileName,
- "noIRM|noirm"=>\$noIRM,
- "m:f"=>\$mininc,
- "c:f"=>\$minchange
-);
-
-my $InputParaDes=" Usage of the script:
- -1 input file 1, output from SpliceTrap, *.raw file in the output folder 
- -2 input file 2. see above.
- -o output file prefix.
- -c minimal change required, [default:0.3]
- -m minimal inclusion ratio for at least one condition. [defualt:0.1]
- --noIRM Use the unadjusted inclusion ratios (before IRM correction)
-";
-
-if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-
-
-if(-d "$OutputFileName.cache" )
-{
- print "Aborted! output cache folder exists: $OutputFileName.cache \n";
- exit;
-}
-else
-{
- system("mkdir $OutputFileName.cache");
-}
-
-#
-my %ir1; # records ir from file1
-my %ir2; # records ir from file2
-# only records trios above the cutoffs
-
-open(input1, $InputFileName1) or die "$InputFileName1 open error!\n";
-while(my $line=<input1>)
-{
- chomp($line);
- my @a = split("\t", $line);
- if($a[21] ne "na")
- {
- if($noIRM)
- {
- $ir1{$a[0]} = $a[1];
- }
- else
- {
- $ir1{$a[0]} = $a[2];
- }
- }
-}
-print scalar(keys (%ir1) )," records loaded from $InputFileName1\n";
-close(input1);
-
-open(input2, $InputFileName2) or die "$InputFileName2 open error!\n";
-while(my $line=<input2>)
-{
-        chomp($line);
-        my @a = split("\t", $line);
-        if($a[21] ne "na")
-        {
- if($noIRM)
- {
- $ir2{$a[0]} = $a[1];
- }
- else
- {
-                 $ir2{$a[0]} = $a[2];
- }
-        }
-}
-print scalar(keys (%ir2) )," records loaded from $InputFileName2\n";
-
-
-close(input2);
-
-
-##
-my %mean;
-my %sd;
-
-my %num;
-
-my %filehandles;

-my @types = ("CA", "IR", "AD","AA");
-
-foreach my $type (@types)
-{
- my $fh = new FileHandle;
- open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n";
- $filehandles{$type} = $fh;
-}
-
-
-foreach my $key (keys %ir1)
-{
- if(exists $ir2{$key})
- {
- if(($ir1{$key} + $ir2{$key}) > 0)
- {
- #find the type
- my $type = substr($key, 0, 2);
- $type = "CA" if $type eq "CS";
- $num{$type}++;
-
- my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key});
- $mean{$type} = $mean{$type} + $change;
- $sd{$type} = $change*$change + $sd{$type};
-
- $change = sprintf("%.4f",$change);
-
- my $fout =  $filehandles{$type};
- print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n";
- }
- }
-}
-
-foreach my $type (keys %filehandles)
-{
- close($filehandles{$type});
- if($num{$type} == 0)
- {
- warn  "no AS events passed filters for both files\n";
- next;
- }
- $mean{$type} = $mean{$type}/$num{$type};
- $sd{$type} = sqrt($sd{$type}/$num{$type});
- system("R  --slave --args  $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R");
- system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report");
- print "$num{$type} $type events processed...\n";
- #print $mean{$type},"\t",  $sd{$type} ,"\t",$num{$type},"\n";
-
-}
-system("rm $OutputFileName.cache -rf");
-
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceChange.pl
--- a/SpliceTrap/bin/SpliceChange.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,174 +0,0 @@
-# compare two outputs from SpliceTrap
-use strict;
-
-# the information needed
-# inclusion ratio input file
-# filtered out or not input file
-# minimal inclusion ratio at least 0.1 for one condition
-# minimal splicing changes parameter
-# orignial pipeline written by Martin Akerman
-# re-organized and re-written by Jie Wu
-
-use FileHandle;
-
-use Getopt::Long;
-
-my @programs = ('grep','mkdir','R','paste','awk','sort');
-foreach my $program (@programs)
-{
-        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-}
-
-
-my $InputFileName1 = "";
-my $InputFileName2 = "";
-my $OutputFileName = "";
-my $minchange = 0.3;
-my $mininc = 0.1;
-my $noIRM = 0;
-
-
-GetOptions (
- "1:s"=>\$InputFileName1,
- "2:s"=>\$InputFileName2,
- "o:s"=>\$OutputFileName,
- "noIRM|noirm"=>\$noIRM,
- "m:f"=>\$mininc,
- "c:f"=>\$minchange
-);
-
-my $InputParaDes=" Usage of the script:
- -1 input file 1, output from SpliceTrap, *.raw file in the output folder 
- -2 input file 2. see above.
- -o output file prefix.
- -c minimal change required, [default:0.3]
- -m minimal inclusion ratio for at least one condition. [defualt:0.1]
- --noIRM Use the unadjusted inclusion ratios (before IRM correction)
-";
-
-if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-
-
-if(-d "$OutputFileName.cache" )
-{
- print "Aborted! output cache folder exists: $OutputFileName.cache \n";
- exit;
-}
-else
-{
- system("mkdir $OutputFileName.cache");
-}
-
-#
-my %ir1; # records ir from file1
-my %ir2; # records ir from file2
-# only records trios above the cutoffs
-
-open(input1, $InputFileName1) or die "$InputFileName1 open error!\n";
-while(my $line=<input1>)
-{
- chomp($line);
- my @a = split("\t", $line);
- if($a[21] ne "na")
- {
- if($noIRM)
- {
- $ir1{$a[0]} = $a[1];
- }
- else
- {
- $ir1{$a[0]} = $a[2];
- }
- }
-}
-print scalar(keys (%ir1) )," records loaded from $InputFileName1\n";
-close(input1);
-
-open(input2, $InputFileName2) or die "$InputFileName2 open error!\n";
-while(my $line=<input2>)
-{
-        chomp($line);
-        my @a = split("\t", $line);
-        if($a[21] ne "na")
-        {
- if($noIRM)
- {
- $ir2{$a[0]} = $a[1];
- }
- else
- {
-                 $ir2{$a[0]} = $a[2];
- }
-        }
-}
-print scalar(keys (%ir2) )," records loaded from $InputFileName2\n";
-
-
-close(input2);
-
-
-##
-my %mean;
-my %sd;
-
-my %num;
-
-my %filehandles;

-my @types = ("CA", "IR", "AD","AA");
-
-foreach my $type (@types)
-{
- my $fh = new FileHandle;
- open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n";
- $filehandles{$type} = $fh;
-}
-
-
-foreach my $key (keys %ir1)
-{
- if(exists $ir2{$key})
- {
- if(($ir1{$key} + $ir2{$key}) > 0)
- {
- #find the type
- my $type = substr($key, 0, 2);
- $type = "CA" if $type eq "CS";
- $num{$type}++;
-
- my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key});
- $mean{$type} = $mean{$type} + $change;
- $sd{$type} = $change*$change + $sd{$type};
-
- $change = sprintf("%.4f",$change);
-
- my $fout =  $filehandles{$type};
- print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n";
- }
- }
-}
-
-foreach my $type (keys %filehandles)
-{
- close($filehandles{$type});
- if($num{$type} == 0)
- {
- warn  "no AS events passed filters for both files\n";
- next;
- }
- $mean{$type} = $mean{$type}/$num{$type};
- $sd{$type} = sqrt($sd{$type}/$num{$type});
- system("R  --slave --args  $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R");
- system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report");
- print "$num{$type} $type events processed...\n";
- #print $mean{$type},"\t",  $sd{$type} ,"\t",$num{$type},"\n";
-
-}
-system("rm $OutputFileName.cache -rf");
-
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceTrap
--- a/SpliceTrap/bin/SpliceTrap Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,263 +0,0 @@
-#!/usr/bin/perl
-my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
-# Author: wuj@cshl.edu
-use strict;
-use Getopt::Long;
-####################
-use Cwd;
-my $PROG = $0;
-my $CUR_DIR = Cwd::abs_path(Cwd::cwd());
-my $PROG_ABS_PATH = Cwd::abs_path($PROG);
-#my $SrcFolder=`dirname $PROG_ABS_PATH`;
-#chomp($SrcFolder);
-#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";
-#my $SrcFolder=$config{SrcFolder};
-
-my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' );
-foreach my $program (@programs)
-{
- die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-
-}
-
-####################
-my $MapSoftware="bowtie";
-my $DatabasePrefix="hg18";
-my $ReadFileFormat="";
-my $ReadFile1Name="";
-my $ReadFile2Name="";
-my $CutoffLevel="M";
-my $Outputfolder=$CUR_DIR;
-my $OutputPrefix="Result";
-#my $CutoffOnly=0;
-my $ReadSize=36;
-my $JunctionCut=5;
-my $onGalaxy="";
-my $BowtieThreads=1;
-my $noIRMstr="";
-my $noIRM = 0;
-
-GetOptions (
- "m:s"=>\$MapSoftware,
- "d:s"=>\$DatabasePrefix,
-# "f:s"=>\$ReadFileFormat,
- "1:s"=>\$ReadFile1Name,
- "2:s"=>\$ReadFile2Name,
- "c:s"=>\$CutoffLevel,
- "outdir:s"=>\$Outputfolder,
- "o:s"=>\$OutputPrefix,
- "j:i"=>\$JunctionCut,
- "s:i"=>\$ReadSize,
- "p:i"=>\$BowtieThreads,
- "noIRM|noirm"=>\$noIRM,
- "g:s"=>\$onGalaxy
-# "local:s"=>\$local,
-# "rerun"=>\$CutoffOnly
-);
-#-O for galaxy output
-
-
-my $InputParaDes=" Usage of the script:
- -m Mapping software: [bowtie]/rmap
- -d Database prefix: [hg18]/mm9/rn4/userdefined
- -1 Read File 1
- -2 Read File 2
- -c Cutoff Level:H/[M]/L
- Means High, Middle or Low
- -j Junction reads requirement per junction for each exon-isoform [5]
- -o Output prefix {Result}
- -s Read Size [36]
- --outdir Output folder [./]
- -p Bowtie parameter, threads number, only use this when you don't use qsub [1]
- --noIRM Skip the IRM correction step
-
- This is a quick help, please refer to the README file for details.
-";
-
-if($ReadFile2Name eq "")
-{
- $ReadFile2Name = $ReadFile1Name;
- #trigger singled end mode
-}
-
-if($ReadFile1Name eq "" or $ReadFile2Name eq "" )
-{
- print $InputParaDes;
- exit;
-}
-
-if($BowtieThreads < 1)
-{
- print $InputParaDes;
- exit;
-}
-
-if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel")
-{
- print "CHECK: Error, the database you specified is not properly installed.\n";
- #print $InputParaDes;
- exit;
-
-}
-
-if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")
-{
- print $InputParaDes;
-        exit;
-}
-
-$ReadFile1Name = Cwd::abs_path($ReadFile1Name);
-$ReadFile2Name = Cwd::abs_path($ReadFile2Name);
-
-#check the files
-open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n");
-my $checkoneline = <check>;
-if(substr($checkoneline,0,1) eq ">")
-{
- $ReadFileFormat = "fasta";
-}
-elsif(substr($checkoneline,0,1) eq "@")
-{
- $ReadFileFormat = "fastq";
-}
-else
-{
- die("CHECK: ERROR:Please check $ReadFile1Name\n");
-}
-close(check);
-
-open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n");
-my $checkoneline = <check>;
-if(substr($checkoneline,0,1) eq ">")
-{
-        die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta");
-}
-elsif(substr($checkoneline,0,1) eq "@")
-{
-        die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq");
-}
-else
-{
-        die("CHECK: ERROR:Please check $ReadFile2Name\n");
-}
-close(check);
-
-$Outputfolder= Cwd::abs_path($Outputfolder);
-if($Outputfolder eq "/tmp")
-{
-        while(-e $Outputfolder)
-        {
-                my $random_foldername = random_sessid();
-                $Outputfolder = "/tmp/".$random_foldername;
-        }
-}
-
-
-if(! -e $Outputfolder)
-{
- mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n";
-}
-if(! -d $Outputfolder)
-{
- die "CHECK: $Outputfolder is not a folder\n";
-}
-
-if($MapSoftware eq "bowtie")
-{
- print "CHECK: whether bowtie installed and in PATH\n";
- my $bowtiechecker=`bowtie --version`;
- if($bowtiechecker ne "")
- {
- print "CHECK: bowtie found, information below:\n";
- print $bowtiechecker,"\n";
- }
- else
- {
- die "CHECK: No bowtie found in PATH, EXIT!\n";
- }
-}
-elsif($MapSoftware eq "rmap")
-{
- print "CHECK: checking rmap...\n";
- if(system("type rmap &>/dev/null") ==0 )
- {
- print "CHECK: rmap found, continue\n";
- }
- else
- {
- die "CHECK: No rmap found in PATH, EXIT!\n";
- }
-}
-else
-{
- die "CHECK: option -m only takes rmap or bowtie as inputs\n";
-}
-
-if($ReadSize == 0)
-{
- die "CHECK: Please check option -s Read size\n";
-}
-
-if($noIRM)
-{
- $noIRMstr= "noirm";
-}
-
-#write more checks later
-print "PARAMETERS:\tMapping software:  ",$MapSoftware,"\n";
-print "PARAMETERS:\tDatabase prefix:   ",$DatabasePrefix,"\n";
-print "PARAMETERS:\tRead end 1:        ",$ReadFile1Name,"\n";
-print "PARAMETERS:\tRead end 2:        ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name);
-print "PARAMETERS:\tCutoff level:      ",$CutoffLevel,"\n";
-print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n";
-print "PARAMETERS:\tOutput folder:     ",$Outputfolder,"\n";
-print "PARAMETERS:\tOutput prefix:     ",$OutputPrefix,"\n";
-print "PARAMETERS:\tRead size:         ",$ReadSize,"\n";
-print "PARAMETERS:\tBowtie threads #:  ",$BowtieThreads,"\n";
-print "PARAMETERS:\tNo IRM.\n" if ($noIRM);
-
-if($MapSoftware eq "bowtie")
-{
- print "=================STAGE 1 MAPPING===================\n";
- system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");
-        system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);
- print "=================STAGE 2 ESTIMATION================\n";
-
- system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
- print "=================STAGE 3 CUTOFF====================\n";
-        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
-
-
-}
-
-if($MapSoftware eq "rmap")
-{
- print "=================STAGE 1 MAPPING===================\n";
-
-        system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;
-        system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);
- print "=================STAGE 2 ESTIMATION================\n";
-
-        system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
- print "=================STAGE 3 CUTOFF====================\n";
-        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
-
-
-}
-
-print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n";
-
-if($onGalaxy ne "")
-{
-        system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy");
-}
-
-sub random_sessid
-{
-        #my @chars = (0..9,a..z,A..Z);
-        my @chars = ('a'..'z','A'..'Z');
-        my $len = 10;
-        my $string = join '', map {$chars[rand(@chars)]} (1..$len);
-        return $string;
-     }
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceTrap.pl
--- a/SpliceTrap/bin/SpliceTrap.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,261 +0,0 @@
-# Author: wuj@cshl.edu
-use strict;
-use Getopt::Long;
-####################
-use Cwd;
-my $PROG = $0;
-my $CUR_DIR = Cwd::abs_path(Cwd::cwd());
-my $PROG_ABS_PATH = Cwd::abs_path($PROG);
-#my $SrcFolder=`dirname $PROG_ABS_PATH`;
-#chomp($SrcFolder);
-#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";
-#my $SrcFolder=$config{SrcFolder};
-
-my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' );
-foreach my $program (@programs)
-{
- die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-
-}
-
-####################
-my $MapSoftware="bowtie";
-my $DatabasePrefix="hg18";
-my $ReadFileFormat="";
-my $ReadFile1Name="";
-my $ReadFile2Name="";
-my $CutoffLevel="M";
-my $Outputfolder=$CUR_DIR;
-my $OutputPrefix="Result";
-#my $CutoffOnly=0;
-my $ReadSize=36;
-my $JunctionCut=5;
-my $onGalaxy="";
-my $BowtieThreads=1;
-my $noIRMstr="";
-my $noIRM = 0;
-
-GetOptions (
- "m:s"=>\$MapSoftware,
- "d:s"=>\$DatabasePrefix,
-# "f:s"=>\$ReadFileFormat,
- "1:s"=>\$ReadFile1Name,
- "2:s"=>\$ReadFile2Name,
- "c:s"=>\$CutoffLevel,
- "outdir:s"=>\$Outputfolder,
- "o:s"=>\$OutputPrefix,
- "j:i"=>\$JunctionCut,
- "s:i"=>\$ReadSize,
- "p:i"=>\$BowtieThreads,
- "noIRM|noirm"=>\$noIRM,
- "g:s"=>\$onGalaxy
-# "local:s"=>\$local,
-# "rerun"=>\$CutoffOnly
-);
-#-O for galaxy output
-
-
-my $InputParaDes=" Usage of the script:
- -m Mapping software: [bowtie]/rmap
- -d Database prefix: [hg18]/mm9/rn4/userdefined
- -1 Read File 1
- -2 Read File 2
- -c Cutoff Level:H/[M]/L
- Means High, Middle or Low
- -j Junction reads requirement per junction for each exon-isoform [5]
- -o Output prefix {Result}
- -s Read Size [36]
- --outdir Output folder [./]
- -p Bowtie parameter, threads number, only use this when you don't use qsub [1]
- --noIRM Skip the IRM correction step
-
- This is a quick help, please refer to the README file for details.
-";
-
-if($ReadFile2Name eq "")
-{
- $ReadFile2Name = $ReadFile1Name;
- #trigger singled end mode
-}
-
-if($ReadFile1Name eq "" or $ReadFile2Name eq "" )
-{
- print $InputParaDes;
- exit;
-}
-
-if($BowtieThreads < 1)
-{
- print $InputParaDes;
- exit;
-}
-
-if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel")
-{
- print "CHECK: Error, the database you specified is not properly installed.\n";
- #print $InputParaDes;
- exit;
-
-}
-
-if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")
-{
- print $InputParaDes;
-        exit;
-}
-
-$ReadFile1Name = Cwd::abs_path($ReadFile1Name);
-$ReadFile2Name = Cwd::abs_path($ReadFile2Name);
-
-#check the files
-open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n");
-my $checkoneline = <check>;
-if(substr($checkoneline,0,1) eq ">")
-{
- $ReadFileFormat = "fasta";
-}
-elsif(substr($checkoneline,0,1) eq "@")
-{
- $ReadFileFormat = "fastq";
-}
-else
-{
- die("CHECK: ERROR:Please check $ReadFile1Name\n");
-}
-close(check);
-
-open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n");
-my $checkoneline = <check>;
-if(substr($checkoneline,0,1) eq ">")
-{
-        die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta");
-}
-elsif(substr($checkoneline,0,1) eq "@")
-{
-        die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq");
-}
-else
-{
-        die("CHECK: ERROR:Please check $ReadFile2Name\n");
-}
-close(check);
-
-$Outputfolder= Cwd::abs_path($Outputfolder);
-if($Outputfolder eq "/tmp")
-{
-        while(-e $Outputfolder)
-        {
-                my $random_foldername = random_sessid();
-                $Outputfolder = "/tmp/".$random_foldername;
-        }
-}
-
-
-if(! -e $Outputfolder)
-{
- mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n";
-}
-if(! -d $Outputfolder)
-{
- die "CHECK: $Outputfolder is not a folder\n";
-}
-
-if($MapSoftware eq "bowtie")
-{
- print "CHECK: whether bowtie installed and in PATH\n";
- my $bowtiechecker=`bowtie --version`;
- if($bowtiechecker ne "")
- {
- print "CHECK: bowtie found, information below:\n";
- print $bowtiechecker,"\n";
- }
- else
- {
- die "CHECK: No bowtie found in PATH, EXIT!\n";
- }
-}
-elsif($MapSoftware eq "rmap")
-{
- print "CHECK: checking rmap...\n";
- if(system("type rmap &>/dev/null") ==0 )
- {
- print "CHECK: rmap found, continue\n";
- }
- else
- {
- die "CHECK: No rmap found in PATH, EXIT!\n";
- }
-}
-else
-{
- die "CHECK: option -m only takes rmap or bowtie as inputs\n";
-}
-
-if($ReadSize == 0)
-{
- die "CHECK: Please check option -s Read size\n";
-}
-
-if($noIRM)
-{
- $noIRMstr= "noirm";
-}
-
-#write more checks later
-print "PARAMETERS:\tMapping software:  ",$MapSoftware,"\n";
-print "PARAMETERS:\tDatabase prefix:   ",$DatabasePrefix,"\n";
-print "PARAMETERS:\tRead end 1:        ",$ReadFile1Name,"\n";
-print "PARAMETERS:\tRead end 2:        ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name);
-print "PARAMETERS:\tCutoff level:      ",$CutoffLevel,"\n";
-print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n";
-print "PARAMETERS:\tOutput folder:     ",$Outputfolder,"\n";
-print "PARAMETERS:\tOutput prefix:     ",$OutputPrefix,"\n";
-print "PARAMETERS:\tRead size:         ",$ReadSize,"\n";
-print "PARAMETERS:\tBowtie threads #:  ",$BowtieThreads,"\n";
-print "PARAMETERS:\tNo IRM.\n" if ($noIRM);
-
-if($MapSoftware eq "bowtie")
-{
- print "=================STAGE 1 MAPPING===================\n";
- system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");
-        system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);
- print "=================STAGE 2 ESTIMATION================\n";
-
- system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
- print "=================STAGE 3 CUTOFF====================\n";
-        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
-
-
-}
-
-if($MapSoftware eq "rmap")
-{
- print "=================STAGE 1 MAPPING===================\n";
-
-        system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;
-        system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);
- print "=================STAGE 2 ESTIMATION================\n";
-
-        system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
- print "=================STAGE 3 CUTOFF====================\n";
-        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");
-
-
-}
-
-print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n";
-
-if($onGalaxy ne "")
-{
-        system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy");
-}
-
-sub random_sessid
-{
-        #my @chars = (0..9,a..z,A..Z);
-        my @chars = ('a'..'z','A'..'Z');
-        my $len = 10;
-        my $string = join '', map {$chars[rand(@chars)]} (1..$len);
-        return $string;
-     }
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceTrap_measure.pl
--- a/SpliceTrap/bin/SpliceTrap_measure.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,111 +0,0 @@
-# Author: wuj@cshl.edu
-use strict;
-use Getopt::Long;
-####################
-my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";
-my $SrcFolder=$config{SrcFolder};
-#my $SrcFolder="/data/zhang/wuj/tools/SpliceTrap.0.8";
-####################
-my $MapSoftware="eland";
-my $ReadFileFormat="";
-my $ReadFile1Name="";
-my $ReadFile2Name="";
-my $CutoffLevel="H";
-my $OutputPrefix="Result";
-my $CutoffOnly=0;
-my $ReadSize=36;
-
-GetOptions (
- "m:s"=>\$MapSoftware,
- "f:s"=>\$ReadFileFormat,
- "1:s"=>\$ReadFile1Name,
- "2:s"=>\$ReadFile2Name,
- "c:s"=>\$CutoffLevel,
- "o:s"=>\$OutputPrefix,
- "s:i"=>\$ReadSize,
-# "local:s"=>\$local,
- "rerun"=>\$CutoffOnly
-);
-
-
-my $InputParaDes=" Usage of the script (v0.82):
- -m Mapping software: eland/bowtie/rmap
- -f Read File Format: fasta/fastq
- -1 Read File 1
- -2 Read File 2
- -c Cutoff Level:H/M/L
- Means High, Middle or Low
- -o Output prefix
- -s Read Size 36
- --rerun Only run the last step, which is filtering
-";
-
-if($ReadFile1Name eq "" or $ReadFile2Name eq "" or $ReadFileFormat eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-if($ReadFileFormat ne "fastq" and $ReadFileFormat ne "fasta")
-{
-        print $InputParaDes;
-        exit;
-
-}
-
-if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")
-{
- print $InputParaDes;
-        exit;
-}
-
-my $dirname1=`dirname $ReadFile1Name`;
-my $dirname2=`dirname $ReadFile2Name`;
-if($dirname1 ne ".")
-{
-         system("ln -s $ReadFile1Name ./");
-}
-if($dirname2 ne ".")
-{
-         system("ln -s $ReadFile2Name ./");
-}
-
-$ReadFile1Name = `basename $ReadFile1Name`;
-chomp($ReadFile1Name);
-$ReadFile2Name = `basename $ReadFile2Name`;
-chomp($ReadFile2Name);
-my $start = time;
-if($MapSoftware eq "bowtie")
-{
- print "=================STAGE 1 MAPPING===================\n";
- system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly;
-        system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly;
- print "STAGE 1 FINISHED IN ",time-$start," seconds\n";
- print "=================STAGE 2 ESTIMATION================\n";
-
- system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly;
- print "STAGE 2 FINISHED IN ",time-$start," seconds\n";
- print "=================STAGE 3 CUTOFF====================\n";
-        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel");
- print "STAGE 3 FINISHED IN ",time-$start," seconds\n";
-
-
-
-}
-
-if($MapSoftware eq "rmap")
-{
- print "=================STAGE 1 MAPPING===================\n";
-
-        system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly;
-        system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly;
- print "=================STAGE 2 ESTIMATION================\n";
-
-        system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly;
- print "=================STAGE 3 CUTOFF====================\n";
-        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel");
-
-
-}
-
-print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n";
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/TXdbgen
--- a/SpliceTrap/bin/TXdbgen Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,97 +0,0 @@
-#!/usr/bin/perl
-my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
-# this script is to generate TXdb database files from bed/gtf file
-
-use strict;
-use Cwd;
-use Getopt::Long;
-
-my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
-foreach my $program (@programs)
-{
-        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-
-}
-
-
-my $genomedir = "";
-
-my $annofilename = "";
-my $txdbname = "userdefined";
-my $knownonly = 0;
-my $gtfinput = 0;
-
-GetOptions (
- "g:s"=>\$genomedir,
- "a:s"=>\$annofilename,
- "n:s"=>\$txdbname,
- "gtf"=>\$gtfinput,
- "knownonly"=>\$knownonly
-);
-
-my $InputParaDes="      Usage of the script:
-        -g      genome fasta file location
- -a annotation file (bed/gtf)
- -n txdb name
- --gtf specify this if annotation file is in gtf format
-";
-
-if($genomedir eq "" or $annofilename eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-$genomedir = Cwd::abs_path($genomedir);
-$annofilename = Cwd::abs_path($annofilename);
-
-my $annofilebase = `basename $annofilename`;
-chomp($annofilebase);
-#need a cache folder to avoid mess
-
-my $cachefolder = $annofilebase.".cache";
-
-if (! -e $cachefolder)
-{
- mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
-}
-if($gtfinput)
-{
- print "TXDBGEN: converting gtf file into bed format\n";
- system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");
- $annofilename = "$cachefolder/$annofilebase.bed";
-}
-
-
-print "TXDBGEN: scan $annofilename for AS events...\n";
-system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
-print "TXDBGEN: fetch sequences from $genomedir...\n";
-system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
-#get fasta file list
-system("ls $genomedir/*.fa >$cachefolder/chr.list");
-
-system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
-
-print "TXDBGEN: generate files for parallel computing...\n";
-if (! -e "$cachefolder/parallel")
-{
- mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
-}
-system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
-system("rm $cachefolder/out.bed");
-system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
-system("rm $cachefolder/TXdb.tmp.evi");
-system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
-print "TXDBGEN: build Bowtie index...\n";
-
-if (! -e "$cachefolder/btw")
-{
- mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
-}
-system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
-system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
-print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
-
-system("mv $cachefolder $SrcFolder/../db/$txdbname");
-print "TXDBGEN: Done!\n";
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/TXdbgen.pl
--- a/SpliceTrap/bin/TXdbgen.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,95 +0,0 @@
-# this script is to generate TXdb database files from bed/gtf file
-
-use strict;
-use Cwd;
-use Getopt::Long;
-
-my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
-foreach my $program (@programs)
-{
-        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
-
-}
-
-
-my $genomedir = "";
-
-my $annofilename = "";
-my $txdbname = "userdefined";
-my $knownonly = 0;
-my $gtfinput = 0;
-
-GetOptions (
- "g:s"=>\$genomedir,
- "a:s"=>\$annofilename,
- "n:s"=>\$txdbname,
- "gtf"=>\$gtfinput,
- "knownonly"=>\$knownonly
-);
-
-my $InputParaDes="      Usage of the script:
-        -g      genome fasta file location
- -a annotation file (bed/gtf)
- -n txdb name
- --gtf specify this if annotation file is in gtf format
-";
-
-if($genomedir eq "" or $annofilename eq "")
-{
- print $InputParaDes;
- exit;
-}
-
-$genomedir = Cwd::abs_path($genomedir);
-$annofilename = Cwd::abs_path($annofilename);
-
-my $annofilebase = `basename $annofilename`;
-chomp($annofilebase);
-#need a cache folder to avoid mess
-
-my $cachefolder = $annofilebase.".cache";
-
-if (! -e $cachefolder)
-{
- mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
-}
-if($gtfinput)
-{
- print "TXDBGEN: converting gtf file into bed format\n";
- system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");
- $annofilename = "$cachefolder/$annofilebase.bed";
-}
-
-
-print "TXDBGEN: scan $annofilename for AS events...\n";
-system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
-print "TXDBGEN: fetch sequences from $genomedir...\n";
-system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
-#get fasta file list
-system("ls $genomedir/*.fa >$cachefolder/chr.list");
-
-system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
-
-print "TXDBGEN: generate files for parallel computing...\n";
-if (! -e "$cachefolder/parallel")
-{
- mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
-}
-system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
-system("rm $cachefolder/out.bed");
-system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
-system("rm $cachefolder/TXdb.tmp.evi");
-system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
-print "TXDBGEN: build Bowtie index...\n";
-
-if (! -e "$cachefolder/btw")
-{
- mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
-}
-system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
-system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
-print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
-
-system("mv $cachefolder $SrcFolder/../db/$txdbname");
-print "TXDBGEN: Done!\n";
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/apply_cutoff.sh
--- a/SpliceTrap/bin/apply_cutoff.sh Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,36 +0,0 @@
-#SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8";
-
-outputname=$1;
-CutoffLevel=$2;
-Outputfolder=$3
-SrcFolder=$5
-JunctionCut=$4
-noIRM=$8
-
-echo "CUTOFF: Entering cutoff step...";
-echo "CUTOFF: Cache folder: $outputname.filter"
-mkdir $Outputfolder/$outputname.filter
-cd $Outputfolder/$outputname.filter
-ln -s ../$outputname.ratio
-ln -s ../$outputname.nums
-echo "CUTOFF: spliting file....and generating shell scripts..."
-split -11000 $outputname.ratio
-
-for ratiofiles in  x*
-do
-        echo "perl $SrcFolder/ApplyCutoff.jie.pl $ratiofiles $CutoffLevel $JunctionCut $noIRM > $ratiofiles.out" >>filter.sh
-done
-
-echo "CUTOFF: submit scripts..."
-perl $SrcFolder/batchqsub.pl filter.sh
-echo "CUTOFF: merging file...."
-cat *.out >../$outputname.raw
-cd ../
-#perl /data/zhang/wuj/tools/SpliceTrap.0.8/ApplyCutoff.jie.pl $outputname.ratio $outputname.nums 8 >$outputname.txt
-if [ "$noIRM" ];then
- awk '{printf $22"\t"$2"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt
-else
- awk '{printf $22"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt
-fi
-rm $outputname.filter -rf
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/batch_para_cov10p_fit.sh
--- a/SpliceTrap/bin/batch_para_cov10p_fit.sh Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,101 +0,0 @@
-#!/bin/bash
-ReadFile1Name=`basename $1`
-ReadFile2Name=`basename $2`
-outputname=$3
-readsize=$4
-DatabasePrefix=$5
-Outputfolder=$6
-SrcFolder=$7
-noIRM=$8
-
-cd $Outputfolder
-if [ $ReadFile1Name != $ReadFile2Name ];then
- echo "ESTIMATE: Getting fragment size information from data..."
- perl $SrcFolder/get.frag.size.pl $ReadFile1Name.nomt $ReadFile2Name.nomt $readsize
- perl $SrcFolder/get.hist.pl $ReadFile1Name.nomt.fragsize -w=1 -c=1
-else
- echo "ESTIMATE: Generating the other half of reads..."
- readnum=`wc -l $ReadFile1Name.nomt |cut -f1 -d" "`
- for (( i=0; i<$readnum; i++ ))
- do
- echo "NM" >>$ReadFile1Name.f.nomt
- done
- echo "#Width:1" >$ReadFile1Name.nomt.fragsize.hist
-fi
-echo "ESTIMATE: Creating cache folder.."
-if [ $ReadFile1Name != $ReadFile2Name ];then
- ReadFile2FinalName=$ReadFile2Name.nomt
-else
- ReadFile2FinalName=$ReadFile1Name.f.nomt
-fi
-
-mkdir $ReadFile1Name.result
-cd $ReadFile1Name.result
-ln -s ../$ReadFile1Name.nomt ./
-ln -s ../$ReadFile2FinalName ./
-ln -s ../$ReadFile1Name.nomt.fragsize.hist ./
-echo "ESTIMATE: Split mapping results via chromosomes..."
-perl $SrcFolder/scan_nomt.pl $ReadFile1Name.nomt $ReadFile2FinalName
-loopi=0
-echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
-while read chrlist
-do
- chr=`echo $chrlist |tr -d "\n"`
- for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
- do
- base=`basename $dbfile`
- echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -s $readsize" >>r$loopi.sh
- done
-done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list
-
-echo "ESTIMATE: Submit shell scripts for Loop $loopi..."
-perl $SrcFolder/batchqsub.pl r$loopi.sh
-echo "ESTIMATE: Loop $loopi done..."
-
-cat $ReadFile1Name.$loopi.*.ratio >$outputname.$loopi.ratio
-cat $ReadFile1Name.$loopi.*.log >$outputname.$loopi.log
-cat $ReadFile1Name.$loopi.*.nums >$outputname.$loopi.nums
-rm  $ReadFile1Name.$loopi.*.ratio
-rm $ReadFile1Name.$loopi.*.log
-rm $ReadFile1Name.$loopi.*.nums
-
-
-if [ "$noIRM" ];then
- echo "ESTIMATE: No IRM correction, skipped..."
- mv $outputname.$loopi.ratio $outputname.ratio
- mv $outputname.$loopi.log $outputname.log
- mv $outputname.$loopi.nums $outputname.nums
-else
-
- echo "ESTIMATE: derive IRMs from data..."
- awk '{if ($15>=10) printf $0"\n"}' $outputname.$loopi.ratio >$outputname.mle
- perl $SrcFolder/get_event_dist_fit.pl $outputname.mle -c=2 -w=0.001
-
- loopi=1
- echo "ESTIMATE: Generating shell scripts for Loop $loopi..."
- while read chrlist
- do
- chr=`echo $chrlist |tr -d "\n"`
- for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.*
- do
- base=`basename $dbfile`
- echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -b $outputname.mle.fit.hist -s $readsize" >>r$loopi.sh
- done
- done  <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list
- echo "ESTIMATE: Submit shell scripts for Loop $loopi..."
-
-#perl $SrcFolder/qsub/batchqsub.pl r$loopi.sh $taskname
- perl $SrcFolder/batchqsub.pl r$loopi.sh
- echo "ESTIMATE: Loop $loopi done..."
- cat $ReadFile1Name.$loopi.*.ratio >$outputname.ratio
- cat $ReadFile1Name.$loopi.*.log >$outputname.log
- cat $ReadFile1Name.$loopi.*.nums >$outputname.nums
- rm  $ReadFile1Name.$loopi.*.ratio
- rm $ReadFile1Name.$loopi.*.log
- rm $ReadFile1Name.$loopi.*.nums
-fi
-
-mv $outputname.ratio $outputname.log $outputname.nums ../
-cd ../
-rm $ReadFile1Name.result -rf
-rm $ReadFile1Name.nomt $ReadFile2FinalName 
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/batchqsub.pl
--- a/SpliceTrap/bin/batchqsub.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,158 +0,0 @@
-
-# modified from Chenghai Xue's script
-
-#test if qsub works
-
-my $qsub_checker = 0;
-if(system("hash qsub >/dev/null"))
-{
- $qsub_checker = 0;
-}
-else
-{
- $test_randname=random_sessid();
- system("mkdir $test_randname;");
- system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh");
- system("mkdir $test_randname/qsub_cache");
- system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname  $test_randname/$test_randname.sh");
-
- $status=0;
- $sec=5;
- while(1)
- {
- $chkresult=`qstat |grep $test_randname |wc -l`;
- chomp($chkresult);
- if ($chkresult == 0)
- {
- $sec=10;
- $status++;
- last if ($status==3);
- }
- else
- {
- $status=0;
- $sec=5;
- }
- print "QSTAT: $chkresult testing tasks running.....$taskname\n";
- sleep($sec);
- }
- print "QSUB: testing done\n";
- print "$test_randname/$test_randname\n";
- if(-d "$test_randname/$test_randname")
- {
- $qsub_checker=1;
- print "QSUB: working well!\n";
- }
-#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`;
- system("rm $test_randname -rf");
-}
-#$qsub_checker=0;
-if($qsub_checker == 0)
-{
- print "QSUB: No GRID qsub found\n";
- print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n";
- print "QSUB: Running in serial mode...\n";
- system("sh $ARGV[0]");
- exit;
-}
-
-$performListFile = $ARGV[0];
-$taskname = "";
-$taskname = $ARGV[1];
-if (not $taskname)
-{
- $taskname=random_sessid();
-}
-#$outfullDir = $ARGV[2];
-$outfullDir ="qsub_cache";
-
-# correct path
-if(! (-d $outfullDir) ){
- system ("mkdir $outfullDir");
-}
-
-# create a temp cache
-@temp = split("/", $0);
-$prog = pop @temp;
-$cache = $outfullDir."/".$prog."_".$taskname;
-if(! (-d $cache) ){
- system ("mkdir $cache");
-}
-
-open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n";
-@performList = (<IN_1>);
-chomp @performList;
-close IN_1 or die "can't close the input file : $!";
-
-
-$scriptListFile = $outfullDir."/".$taskname."_scripts.list";
-open (FSCRIPLIST, ">$scriptListFile");
-for($i=0; $i<@performList; $i++){
- $scriptFile = $outfullDir."/".$taskname."_script$i.sh";
- print FSCRIPLIST "$scriptFile\n";
- open (FOUT, ">$scriptFile");
-
-# print FOUT "#!/usr/bin/sh\n";
- print FOUT "$performList[$i]\n";
-
-# print OUT_1 "$outfile.map\n";
- close (FOUT);
-}
-close (FSCRIPLIST);
-
-open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n";
-$basename=`basename $performListFile`;
-chop($basename);
-$taskname=$taskname."_".$basename;
-#print $basename;
-while(<IN_2>){
- $f = $_;
- chomp $f;
- @temp = split("/", $f);
- $base = pop @temp;
-
- #use default queues
-# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n";
- #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f");
- system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f");
-
-}
-close IN_2 or die "can't close the input file : $!";
-
-#start to check stats of qsub tasks
-#######################################
-my $taskname_query=substr($taskname,0,10);
-
-
-$status=0;
-$sec=60;
-while(1)
-{
- $chkresult=`qstat |grep $taskname_query |wc -l`;
- chomp($chkresult);
- if ($chkresult == 0)
- {
- $sec=10;
- $status++;
- last if ($status==3);
- }
- else
- {
- $status=0;
- $sec=60;
- }
- print "QSTAT: $chkresult tasks running.....$taskname\n";
- sleep($sec);
-}
-print "QSUB: done: $taskname \n";
-######################################
-
-#
-sub random_sessid
-{
- #my @chars = (0..9,a..z,A..Z);
- my @chars = ('a'..'z','A'..'Z');
- my $len = 10;
- my $string = join '', map {$chars[rand(@chars)]} (1..$len);
- return $string;
-}
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/batchqsub.pl_orig
--- a/SpliceTrap/bin/batchqsub.pl_orig Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,158 +0,0 @@
-
-# modified from Chenghai Xue's script
-
-#test if qsub works
-
-my $qsub_checker = 0;
-if(system("hash qsub >/dev/null"))
-{
- $qsub_checker = 0;
-}
-else
-{
- $test_randname=random_sessid();
- system("mkdir $test_randname;");
- system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh");
- system("mkdir $test_randname/qsub_cache");
- system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname  $test_randname/$test_randname.sh");
-
- $status=0;
- $sec=5;
- while(1)
- {
- $chkresult=`qstat |grep $test_randname |wc -l`;
- chomp($chkresult);
- if ($chkresult == 0)
- {
- $sec=10;
- $status++;
- last if ($status==3);
- }
- else
- {
- $status=0;
- $sec=5;
- }
- print "QSTAT: $chkresult testing tasks running.....$taskname\n";
- sleep($sec);
- }
- print "QSUB: testing done\n";
- print "$test_randname/$test_randname\n";
- if(-d "$test_randname/$test_randname")
- {
- $qsub_checker=1;
- print "QSUB: working well!\n";
- }
-#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`;
- system("rm $test_randname -rf");
-}
-#$qsub_checker=0;
-if($qsub_checker == 0)
-{
- print "QSUB: No GRID qsub found\n";
- print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n";
- print "QSUB: Running in serial mode...\n";
- system("sh $ARGV[0]");
- exit;
-}
-
-$performListFile = $ARGV[0];
-$taskname = "";
-$taskname = $ARGV[1];
-if (not $taskname)
-{
- $taskname=random_sessid();
-}
-#$outfullDir = $ARGV[2];
-$outfullDir ="qsub_cache";
-
-# correct path
-if(! (-d $outfullDir) ){
- system ("mkdir $outfullDir");
-}
-
-# create a temp cache
-@temp = split("/", $0);
-$prog = pop @temp;
-$cache = $outfullDir."/".$prog."_".$taskname;
-if(! (-d $cache) ){
- system ("mkdir $cache");
-}
-
-open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n";
-@performList = (<IN_1>);
-chomp @performList;
-close IN_1 or die "can't close the input file : $!";
-
-
-$scriptListFile = $outfullDir."/".$taskname."_scripts.list";
-open (FSCRIPLIST, ">$scriptListFile");
-for($i=0; $i<@performList; $i++){
- $scriptFile = $outfullDir."/".$taskname."_script$i.sh";
- print FSCRIPLIST "$scriptFile\n";
- open (FOUT, ">$scriptFile");
-
-# print FOUT "#!/usr/bin/sh\n";
- print FOUT "$performList[$i]\n";
-
-# print OUT_1 "$outfile.map\n";
- close (FOUT);
-}
-close (FSCRIPLIST);
-
-open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n";
-$basename=`basename $performListFile`;
-chop($basename);
-$taskname=$taskname."_".$basename;
-#print $basename;
-while(<IN_2>){
- $f = $_;
- chomp $f;
- @temp = split("/", $f);
- $base = pop @temp;
-
- #use default queues
-# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n";
- #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f");
- system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f");
-
-}
-close IN_2 or die "can't close the input file : $!";
-
-#start to check stats of qsub tasks
-#######################################
-my $taskname_query=substr($taskname,0,10);
-
-
-$status=0;
-$sec=60;
-while(1)
-{
- $chkresult=`qstat |grep $taskname_query |wc -l`;
- chomp($chkresult);
- if ($chkresult == 0)
- {
- $sec=10;
- $status++;
- last if ($status==3);
- }
- else
- {
- $status=0;
- $sec=60;
- }
- print "QSTAT: $chkresult tasks running.....$taskname\n";
- sleep($sec);
-}
-print "QSUB: done: $taskname \n";
-######################################
-
-#
-sub random_sessid
-{
- #my @chars = (0..9,a..z,A..Z);
- my @chars = ('a'..'z','A'..'Z');
- my $len = 10;
- my $string = join '', map {$chars[rand(@chars)]} (1..$len);
- return $string;
-}
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/beta_fit.R
--- a/SpliceTrap/bin/beta_fit.R Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,39 +0,0 @@
-args = commandArgs();
-input_file=args[4];
-#input_file="control_a.0.1.flt.ratio.tmpca";
-#print (input_file);
-
-
-library(MASS);
-
-p=array(0,dim=1000);
-
-for (i in 0:999)
-{
-         p[i]=0.001
-}
-
-if ( file.info(input_file)["size"]>0 )
-{
-
-data=read.table(input_file);
-col=1;
-x=data[,col];
-x1=x;
-if (length(x)>10)
-{
- x1[x==0] <- .Machine$double.eps;
- x1[x==1] <- (1-.Machine$double.eps);
- xbar=mean(x1)
- xvar=var(x1)
- a <- (xbar*(1-xbar)/xvar - 1)*xbar
- b <- (1-xbar)*a/xbar
- (f=fitdistr(x1,"beta",list(shape1=a,shape2=b)))
- for (i in 0:999)
- {
- p[i]=dbeta(i/1000,f[["estimate"]][["shape1"]],f[["estimate"]][["shape2"]])
- }
-}
-
-}
-write(p,file=paste(input_file,"fit",sep="."),ncolumns=1);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/bowtie2eland.pl
--- a/SpliceTrap/bin/bowtie2eland.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,175 +0,0 @@
-use strict;
-
-my $bowtiefilename=$ARGV[0];
-my $readsfilename=$ARGV[1];
-my $elandfilename=$ARGV[2];
-
-open(readsfile, $readsfilename);
-
-my $detectformat=`head -c 1 $readsfilename`;
-
-#my $firstletter=$detectformat;
-#my $looplinenumbers=4;
-
-#$looplinenumbers=2 if ($detectformat eq ">");
-open(bowtiefile, $bowtiefilename);
-open(elandfile, ">".$elandfilename);
-my $readfilelinenum=0;
-# hash the positions of the alignments for each read id
-my %readposhash;
-my $bowtiepos = tell (bowtiefile);
-while (my $bowtieline=<bowtiefile>)
-{
- my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
- if (not exists $readposhash{$bowtiereadname} )
- {
- $readposhash{$bowtiereadname} = $bowtiepos;
- }
- $bowtiepos = tell (bowtiefile);
-}
-
-while(my $readline=<readsfile>)
-{
- $readfilelinenum++;
- if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
- {
- chomp($readline);
- my $readname=substr($readline, 1, length($readline)-1);
- if( not exists $readposhash{$readname} )
- {
- print elandfile $readname,"\tNA\tNM\n";
- next;
- }
- else
- {
- my @mapped_ids=();
-                        my @mapped_pos=();
-                        my @mapped_strand=();
- seek(bowtiefile, $readposhash{$readname}, 0);
- while (my $bowtieline=<bowtiefile>)
- {
- my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
- if($readname eq $bowtiereadname)
- {
- push(@mapped_ids, $mapped_id);
-                                        push(@mapped_pos,$pos);
-                                        push(@mapped_strand,$strand);
- }
- else
- {
- last;
- }
-
- }
- print elandfile $readname,"\t";
- print elandfile "NA\t";
- print elandfile scalar(@mapped_ids),":0:0\t";
- for(my $i=0;$i<@mapped_ids;$i++)
- {
- print elandfile "/",$mapped_ids[$i];
- print elandfile ":",$mapped_pos[$i]+1;
- if($mapped_strand[$i] eq "+")
- {
- print elandfile "F0,";
- }
- else
- {
- print elandfile "R0,";
- }
-
- }
- print elandfile "\n";
-
- }
- }
-}
-
-close(elandfile);
-close(bowtiefile);
-close(readsfile);
-
-exit;
-while(my $bowtieline=<bowtiefile>)
-{
- my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
- while(my $readline=<readsfile>)
- {
- $readfilelinenum++;
- if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
- #if($readline=~/^$detectformat/)
- {
- chomp($readline);
- my $readname=substr($readline, 1, length($readline)-1);
-
-
- if($readname ne $bowtiereadname)
- {
- print elandfile $readname,"\tNA\tNM\n";
- next;
- }
- else
- {
- my @mapped_ids=();
- my @mapped_pos=();
- my @mapped_strand=();
- push(@mapped_ids, $mapped_id);
- push(@mapped_pos,$pos);
- push(@mapped_strand,$strand);
- while(1)
- {
- $bowtieline=<bowtiefile>;
- my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
- if( $bowtiereadname eq $readname )
- {
- push(@mapped_ids, $mapped_id);
- push(@mapped_pos,$pos);
- push(@mapped_strand,$strand);
- }
- else
- {
- seek(bowtiefile, -1*length($bowtieline),1);
- print elandfile $readname,"\t";
- print elandfile "NA\t";
- print elandfile scalar(@mapped_ids),":0:0\t";
- for(my $i=0;$i<@mapped_ids;$i++)
- {
- print elandfile "/",$mapped_ids[$i];
- print elandfile ":",$mapped_pos[$i]+1;
- if($mapped_strand[$i] eq "+")
- {
- print elandfile "F0,";
- }
- else
- {
- print elandfile "R0,";
- }
-
- }
- print elandfile "\n";
- last;
- }
- }
- last;
-
- }
- }
- }
-}
-
-while(my $readline=<readsfile>)
-{
- $readfilelinenum++;
- if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
- #if($readline=~/^$detectformat/)
-        {
-             chomp($readline);
-             my $readname=substr($readline, 1, length($readline)-1);
-      print  elandfile $readname,"\tNA\tNM\n";
- }
-}
-
-close(elandfile);
-close(bowtiefile);
-
-
-close(readsfile);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/calc_pval.R
--- a/SpliceTrap/bin/calc_pval.R Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,29 +0,0 @@
-args = commandArgs();
-input_file=args[4];
-av=as.numeric(args[5]);
-sd=as.numeric(args[6]);
-nu=as.numeric(args[7]);
-
-
-data=read.table(input_file);
-
-col=4;
-x=data[,col];
-pup=pnorm(x, mean=av, sd=sd, lower.tail = FALSE);
-adpup=p.adjust(pup,method="fdr");
-pdn=pnorm(x, mean=av, sd=sd, lower.tail = TRUE);
-adpdn=p.adjust(pdn,method="fdr");
-
-p=pup;
-
-size = length(x);
-
-for (i in 1:size)
-{
- if(x[i]<0) 
- {
- p[i]=pdn[i];
- }
-}
-write(p, file=paste(input_file,"p",sep="."),ncolumns=1);
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/downloaddb.pl
--- a/SpliceTrap/bin/downloaddb.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,54 +0,0 @@
-use strict;
-use Cwd;
-
-my %flags=(
- "hg18"=>0,
- "mm9"=>0,
- "rn4"=>0,
-);
-my $PROG = $0;
-my $PROG_ABS_PATH = Cwd::abs_path($PROG);
-$PROG_ABS_PATH = `dirname $PROG_ABS_PATH`;
-chomp($PROG_ABS_PATH);
-print "\n\tPrepare to download databases from CSHL...\n";
-
-print "\tWhich database(s) do you want to download?\n";
-print "\tChoose from\t ";
-foreach my $key (keys %flags)
-{
- print $key,"/";
-}
-print "ALL \n\n\tseparated by blank...don't enter anything if you don't want to download!\n\nPlease enter:[NONE]";
-
-my @dbnames = split(/\s+/,<>);
-for (my $i=0;$i<@dbnames;$i++)
-{
- if (uc($dbnames[$i]) eq "ALL" )
- {
- foreach my $key (keys %flags)
- {
- $flags{$key}=1;
- }
- last;
- }
-
- if( exists $flags{$dbnames[$i]})
- {
- $flags{$dbnames[$i]} = 1;
- }
- #system ("wget http://rulai.cshl.edu/splicetrap/db/")
-}
-
-foreach my $key (keys %flags)
-{
- if ($flags{$key} ==1)
- {
- system ("wget http://rulai.cshl.edu/splicetrap/db/".$key.".tar.gz");
- print "untar the database file for $key...please wait...";
- system ("tar -ixzf $key.tar.gz");
- system ("rm $key.tar.gz");
- mkdir "$key/parallel";
- print "creating files for parallel computing...\n";
- system("bash $PROG_ABS_PATH/splitdb.sh $key/parallel")
- }
-}
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get.frag.size.pl
--- a/SpliceTrap/bin/get.frag.size.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,55 +0,0 @@
-#for the results from paired end
-#The two inputs are the results from the two ends
-use strict;
-my $read_size = $ARGV[2];
-open(input1, $ARGV[0]);
-open(input2, $ARGV[1]);
-open(output, ">$ARGV[0].fragsize");
-#open(fusefile,">$ARGV[0].fuse");
-
-#my $LongMarker="L";
-#my $ShortMarker="S";
-
-while(my $line1=<input1>)
-{
- my $line2=<input2>;
- chomp($line1);
- chomp($line2);
- next if($line1=~/$\NM/ or $line2=~/$\NM/ or $line1=~/$\MT/ or $line2=~/$\MT/);
- my @array1 = split("\t",$line1);
- #my $read_size=length($array1[1]);
- my @array2 = split("\t",$line2);
- my $match1=$array1[3];
- my $match2=$array2[3];
-# my $marker=$LongMarker.$ShortMarker;
- my @sizes=();
- #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g)
- while($match1=~/\/(\S[^,]*\[\w+\])\S[^,]*:(\d*)[RF]/g)
- {
- my $name=$1;
- my $posa=$2;
- #print $name,"\n";
-
- if($match2=~/\Q$name\E\S[^,]*:(\d*)[RF]/)
- {
- #print "match\n";
- my $posb=$1;
- push @sizes, abs($posb-$posa)+$read_size;
-
- }
-
- }
- my %saw;
- @saw{@sizes}=();
- my @keya= keys %saw;
- #print scalar(@keya),"\n";
- if(scalar(@keya)==1)
- {
- print output $keya[0],"\n";
- }
-
-}
-#close(fusefile);
-close(output);
-close(input2);
-close(input1);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get.hist.pl
--- a/SpliceTrap/bin/get.hist.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,49 +0,0 @@
-use Getopt::Long;
-use strict;
-
-my $InputFileName=$ARGV[0];
-my $OutputFileName = $ARGV[0].".hist";
-#$OutputFileName=$ARGV[1] if $ARGV[1] ne "";
-my $width=0.01;
-my $verbose=1;
-my $col=2;
-my $start=0;
-my $end=1.000;
-
-
-GetOptions (
-        'w:f'=>\$width,
- 'c:i'=>\$col,
-        'start:f'=>\$start,
-        'end:f'=>\$end,
-        'v'=>\$verbose
-);
-
-$width=$width*1;
-#print "IRM: #Generate hist with delta width of $width \n";
-#print "IRM: #data source from col $col\n";
-
-$col=$col-1;
-
-my @hist;
-my $totalnum=0;
-
-open(Input, $InputFileName);
-while(my $line=<Input>)
-{
- next if($line=~/^#/);
- chomp($line);
- my @array=split(/\s/,$line);
- $hist[int($array[$col]/$width)]++;
- $totalnum++;
-
-}
-close(Input);
-
-open(OutputFile, ">$OutputFileName");
-print OutputFile  "#Width:$width\n";
-for(my $i=0;$i<@hist;$i++)
-{
- print OutputFile $hist[$i]/$totalnum,"\n";
-}
-close(OutputFile);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get_bed_fa_j.pl
--- a/SpliceTrap/bin/get_bed_fa_j.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,330 +0,0 @@\n-# Adapted from Chenghai Xue\'s script\n-\n-$starttime=time();\n-\n-$input_file_1 = $ARGV[0];\t# exon junction file\n-$input_file_2 = $ARGV[1];\t# genome file list\n-$output_file_1 = $ARGV[2];\t# exon junction bed (might be less than input_file_1\n-$output_file_2 = $ARGV[3];\t# exon junction fa\n-#$leftLen = $ARGV[4];\n-#$rightLen = $ARGV[5];\n-\r\n-open(IN_1, "$input_file_1") or die "can\'t open the input file : $!";\n-open(IN_2, "$input_file_2") or die "can\'t open the input file : $!";\n-open OUT_1, ">$output_file_1" or die "Can not open output_file : $!";\r\n-open OUT_2, ">$output_file_2" or die "Can not open output_file : $!";\r\n-\n-@chromList = (<IN_2>);\n-chomp(@chromList);\n-$len_chromList = @chromList;\n-print "BED2FA: in $input_file_2, found $len_chromList chromosomes\\n";\n-foreach $one (@chromList){\n-\tif($one =~ /\\/(chr.[^\\/]*?)\\.*fa$/i){\n-\t\t$chr_hash{$1} = $one;\n-\t\t#print $1,"\\n";\t\n-\t}\n-}\n-@key_chr_hash = keys(%chr_hash);\r\n-$len_key_chr_hash = @key_chr_hash;\r\n-@sort_key_chr_hash = sort_chromNo(@key_chr_hash);\n-$len_sort_key_chr_hash = @sort_key_chr_hash;\n-#for($i=0; $i<$len_sort_key_chr_hash; $i++){\r\n-#\tprint "$sort_key_chr_hash[$i]\t$chr_hash{$sort_key_chr_hash[$i]}\\n";\r\n-#}\n-\n-$num_1=0;\n-$num_2=0;\n-$num_count_chrom=0;\n-my ($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts);\r\n-$current_chrom = "";\n-while(<IN_1>){\n-\t$num_1++;\r\n-\t$line = $_;\n-\tchomp $line;\n-\t#print $line,"\\n";\n-\t@cols = split ("\\t", $line);\n-        if(scalar(@cols)==12)\n-\t{\n-\t($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts) = @cols;\n-\t}\n-\tif(scalar(@cols)!=12)\n-\t{\n-\t\t($chrom, $chromStart, $chromEnd, $name, $score, $strand)=@cols;\n-\t\t$thickStart=$chromStart;\n-\t#\tprint $thickStart,"\\n";\n-\t\t$thickEnd = $chromEnd;\n-\t\t$blockCount=1;\n-\t\t$blockSizes=$chromEnd-$chromStart;\n-\t\t$blockStarts = 0;\t\n-\t}\n-\t$strand="+" if !$strand;\n-\t@a_blockSizes = split (/\\,/, $blockSizes);\n-\t@a_blockStarts = split (/\\,/, $blockStarts);\n-\tif($chrom ne $current_chrom){\n-\t\tif($num_1 != 1){\n-\t\t\tprint "$num_chr_1\t$num_chr_2\t$len_contigSeqStr\\n";\t\t\t\n-\t\t}\n-\t\tprint "BED2FA: $chrom:\t";\n-\t\t\n-\t\t$num_chr_1=0;\n-\t\t$num_chr_2=0;\t\t\n-\n-\t\tif(exists $chr_hash{$chrom}){\n-\t\t\t$num_count_chrom++;\n-\t\t\t$current_chrom = $chrom;\n-\t\t\t#print $current_chrom,"\\n";\n-#=pod\t\t\t\n-\t\t\t$chromFastaFile = $chr_hash{$chrom};\n-\t\t\t#print $chromFastaFile,"\\n";\n-\t\t\topen($fin, "<$chromFastaFile") or die "can\'t open the chrom file : $!";\n-\t\t\tlocal ($/) = undef;\n-\t\t\t$contigSeqStr = <$fin>;\n-\t\t\tclose ($fin);\n-\t\t\t#print $contigSeqStr,"mark\\t";\n-\t\t\t$contigSeqStr =~s/^\\>.*?\\n//g;\n-                        #print $contigSeqStr,"mark2\\t";\n-\n-\t\t\t$contigSeqStr =~s/\\s|\\n//g;\n-                        #print $contigSeqStr,"mark3\\n";\n-\n-\t\t\t$len_contigSeqStr = length $contigSeqStr;\n-#=cut\n-\t\t}\n-\t\telse{\n-\t\t\t$num_chr_1++;\n-\t\t\tnext;\n-\t\t}\n-\t}\n-\t$num_chr_1++;\n-\t\r\n-# modify from here................................\n-\tmy @Starts;\n-\tmy @Ends;\n-\tmy @JuncSeq;\n-\tmy $ssStrTag=1;\n-\tfor($i_wuj=0;$i_wuj<$blockCount;$i_wuj++)\n-\t{\n-\t\t$Starts[$i_wuj] = $chromStart + $a_blockStarts[$i_wuj];\n-\t\t$Ends[$i_wuj] = $Starts[$i_wuj] + $a_blockSizes[$i_wuj];\n-\t\t$JuncSeq[$i_wuj] = uc substr ($contigSeqStr,$Starts[$i_wuj], $a_blockSizes[$i_wuj]);\n-\t\tif($strand eq "-"){\n-\t\t  $JuncSeq[$i_wuj] = uc string_reverse_complement(lc $JuncSeq[$i_wuj]);\n-\t\t}\n-\t}\t \n-       # for($i_wuj=0;$i_wuj<$blockCount-1;$i_wuj++)\n-#\t{\n-#\t        $ssStr = uc substr ($contigSeqStr, $Ends[$i_wuj], 2) . substr ($contigSeqStr, $Starts[$i_wuj+1]  - 2, 2);\n-#\t        if($strand eq "-"){\n- #               $ssStr = uc string_reverse_complement(lc $ssStr);\n-                #$ssStr = $rc_ssStr;\n-#\t        }\n-#\t\t$ssStrTag = 0 if ($ssStr ne "GTAG");\n-\t\t\n- #       }\n-#\tif($ssStrTag ==1){\n-        if(1){\n-\t\t$num_2++;\n-\t\t$num_chr_2++;\n-\t\tprint OUT_1 "$line\\n";\n-\t\t#print OUT_2 ">$name\\|$chrom\\|$chromStart\\|$chromEnd\\|$strand\\|$ssStr\\|$num_2\\n$junctionSeqStrLeft'..b'ret;\r\n-}\n-\r\n-sub sort_chromNo{\n-\tlocal(@chrom) = @_;\n-\tlocal($len_key_chr_hash, $i, @sort_chr_hash);\n-\tlocal(@digit_random, @words_random, @digit_other_1, @digit_other_2, @words_other_1, @words_other_2, @digit, @words);\n-\tlocal(@sort_digit, @sort_words, @sort_digit_random, @sort_words_random, @sort_digit_other, @sort_words_other);\n-\tlocal($len_digit, $len_words, $len_digit_random, $len_words_random, $len_digit_other, $len_words_other, $term);\n-\t\n-\t$len_key_chr_hash = @chrom;\n-\t# sort via chr number for printing result\r\n-\tfor($i=0; $i<$len_key_chr_hash; $i++){\n-\t\tif($key_chr_hash[$i] =~ /chr(\\d+)\\_random/){\r\n-\t\t\tpush(@digit_random, $1);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_random/){\r\n-\t\t\tpush(@words_random, $1);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)\\_([\\w\\d\\_]+)/){\r\n-\t\t\tpush(@digit_other_1, $1);\n-\t\t\tpush(@digit_other_2, $2);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_([\\w\\d\\_]+)/){\r\n-\t\t\tpush(@words_other_1, $1);\n-\t\t\tpush(@words_other_2, $2);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)/){\r\n-\t\t\tpush(@digit, $1);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)/){\r\n-\t\t\tpush(@words, $1);\r\n-\t\t}\r\n-\t\telse{\r\n-\t\t\tprint "BED2FA: There is unknown type of chromosomes: $key_chr_hash[$i]\\n";\r\n-\t\t}\r\n-\t}\r\n-\t@sort_digit = sort by_mostly_numeric @digit;\r\n-\t@sort_words = sort by_mostly_string @words;\n-\t@sort_digit_random = sort by_mostly_numeric @digit_random;\r\n-\t@sort_words_random = sort by_mostly_string @words_random;\n-\t@sort_digit_other = sort_2_array_number_string(\\@digit_other_1, \\@digit_other_2);\r\n-\t@sort_words_other = sort_2_array_string_string(\\@words_other_1, \\@words_other_2);\r\n-\t\r\n-\t$len_digit = @sort_digit;\r\n-\tfor($i=0; $i<$len_digit; $i++){\r\n-\t\t$term = "chr".$sort_digit[$i];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\r\n-\t$len_words = @sort_words;\r\n-\tfor($i=0; $i<$len_words; $i++){\r\n-\t\t$term = "chr".$sort_words[$i];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\n-\t$len_digit_random = @sort_digit_random;\r\n-\tfor($i=0; $i<$len_digit_random; $i++){\r\n-\t\t$term = "chr".$sort_digit_random[$i]."_random";\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\r\n-\t$len_words_random = @sort_words_random;\r\n-\tfor($i=0; $i<$len_words_random; $i++){\r\n-\t\t$term = "chr".$sort_words_random[$i]."_random";\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\t\r\n-\t$len_digit_other = @sort_digit_other;\r\n-\tfor($i=0; $i<$len_digit_other; $i=$i+2){\r\n-\t\t$term = "chr".$sort_digit_other[$i]."_".$sort_digit_other[$i+1];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\r\n-\t$len_words_other = @sort_words_other;\r\n-\tfor($i=0; $i<$len_words_other; $i=$i+2){\r\n-\t\t$term = "chr".$sort_words_other[$i]."_".$sort_words_other[$i+1];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\t\n-\t\n-\treturn @sort_chr_hash;\n-}\r\n-\n-sub sort_2_array_number_string{\r\n-\tlocal($a, $b) = @_;\r\n-\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n-\tlocal(@ret);\r\n-\t\r\n-\t$len_a = @$a;\r\n-\t$len_b = @$b;\r\n-\tif($len_a == $len_b){\r\n-\t\tfor($i=0; $i<$len_a; $i++){\r\n-\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n-\t\t}\r\n-\t\tfor $one (sort by_mostly_numeric keys %family) {\r\n-\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n-\t\t\t\t\tpush(@ret, $one);\r\n-\t\t\t\t\tpush(@ret, $two);\r\n-\t\t\t}\t\r\n-\t\t}\r\n-\t}\r\n-\telse{\t\t\r\n-\t\tprint "ERROR: Sort array is not same size\\n";\r\n-\t\tprint "a $len_a, b $len_b\\n";\r\n-\t}\t\r\n-\t\r\n-\treturn @ret;\r\n-}\n-\n-sub sort_2_array_string_string{\r\n-\tlocal($a, $b) = @_;\r\n-\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n-\tlocal(@ret);\r\n-\t\r\n-\t$len_a = @$a;\r\n-\t$len_b = @$b;\r\n-\tif($len_a == $len_b){\r\n-\t\tfor($i=0; $i<$len_a; $i++){\r\n-\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n-\t\t}\r\n-\t\tfor $one (sort by_mostly_string keys %family) {\r\n-\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n-\t\t\t\t\tpush(@ret, $one);\r\n-\t\t\t\t\tpush(@ret, $two);\r\n-\t\t\t}\t\r\n-\t\t}\r\n-\t}\r\n-\telse{\t\t\r\n-\t\tprint "ERROR: Sort array is not same size\\n";\r\n-\t\tprint "a $len_a, b $len_b\\n";\r\n-\t}\t\r\n-\t\r\n-\treturn @ret;\r\n-}\n-\n-sub by_mostly_numeric{\r\n-#\t( $a <=> $b ) || ( $a cmp $b );\r\n-\t( $a <=> $b );\r\n-}\r\n-\r\n-sub by_mostly_string{\r\n-#\t( $a <=> $b ) || ( $a cmp $b );\r\n-\t( $a cmp $b );\r\n-}\n-\r\n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get_event_dist_fit.pl
--- a/SpliceTrap/bin/get_event_dist_fit.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,108 +0,0 @@
-use Getopt::Long;
-use strict;
-
-use Cwd;
-my $PROG = $0;
-my $CUR_DIR = Cwd::abs_path(Cwd::cwd());
-my $PROG_ABS_PATH = Cwd::abs_path($PROG);
-my $SrcFolder=`dirname $PROG_ABS_PATH`;
-chomp($SrcFolder);
-
-#my $SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8/";
-my $InputFileName=$ARGV[0];
-my $OutputFileName = $ARGV[0].".hist";
-#$OutputFileName=$ARGV[1] if $ARGV[1] ne "";
-my $width=0.001;
-my $verbose=1;
-my $col=2;
-
-GetOptions (
-        'w:f'=>\$width,
-        'c:i'=>\$col,
-        'v'=>\$verbose
-);
-
-$width=$width*1;
-my $binnum=1/$width;
-$col=$col-1;
-
-my @CAratios;
-my @CSratios;
-my @ADratios;
-my @AAratios;
-my @AIratios;
-my @IRratios;
-
-open(Input, $InputFileName);
-while(my $line=<Input>)
-{
-        next if($line=~/^#/);
-        chomp($line);
-        my @array=split(/\s/,$line);
- next if($array[$col]<=0.001 or $array[$col]>=0.999);
-#        push( @CAratios,$array[$col]) if($array[0]=~/^C[AS]/ or $array[0]=~/^ME/);
- push( @CAratios,$array[$col]) if($array[0]=~/^CA/ or $array[0]=~/^ME/);
- push( @CSratios,$array[$col]) if($array[0]=~/^CS/);
- push( @ADratios,$array[$col]) if($array[0]=~/^AD/);
- push( @AAratios,$array[$col]) if($array[0]=~/^AA/);
- push( @AIratios,$array[$col]) if($array[0]=~/^AI/);
- push( @IRratios,$array[$col]) if($array[0]=~/^IR/);
-
-}
-
-close(Input);
-
-open(tmpFile, ">$InputFileName.tmpca");
-for(my $i=0;$i<@CAratios;$i++)
-{
- print tmpFile $CAratios[$i],"\n";
-}
-close(tmpFile);
-open(tmpFile, ">$InputFileName.tmpcs");
-for(my $i=0;$i<@CSratios;$i++)
-{
-        print tmpFile $CSratios[$i],"\n";
-}
-close(tmpFile);
-
-
-open(tmpFile, ">$InputFileName.tmpad");
-for(my $i=0;$i<@ADratios;$i++)
-{
-        print tmpFile $ADratios[$i],"\n";
-}
-close(tmpFile);
-
-open(tmpFile, ">$InputFileName.tmpaa");
-for(my $i=0;$i<@AAratios;$i++)
-{
-        print tmpFile $AAratios[$i],"\n";
-}
-close(tmpFile);
-
-open(tmpFile, ">$InputFileName.tmpai");
-for(my $i=0;$i<@AIratios;$i++)
-{
-        print tmpFile $AIratios[$i],"\n";
-}
-close(tmpFile);
-
-open(tmpFile, ">$InputFileName.tmpir");
-for(my $i=0;$i<@IRratios;$i++)
-{
-        print tmpFile $IRratios[$i],"\n";
-}
-close(tmpFile);
-
-system("R --slave --args $InputFileName.tmpca <$SrcFolder/beta_fit.R");
-system("R --slave --args $InputFileName.tmpad <$SrcFolder/beta_fit.R");
-system("R --slave --args $InputFileName.tmpaa <$SrcFolder/beta_fit.R");
-#system("R --slave --args $InputFileName.tmpai <$SrcFolder/R/beta_fit.R");
-system("R --slave --args $InputFileName.tmpir <$SrcFolder/beta_fit.R");
-system("R --slave --args $InputFileName.tmpcs <$SrcFolder/beta_fit.R");
-
-system("echo '#Width:$width' >$InputFileName.fit.hist");
-#system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpai.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist");
-
-system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist");
-#system("rm $InputFileName.tmpca* $InputFileName.tmpad* $InputFileName.tmpaa* $InputFileName.tmpai* $InputFileName.tmpir* ");
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/gtf2bed.pl
--- a/SpliceTrap/bin/gtf2bed.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,81 +0,0 @@
-# rewrite on Sep 7th,2022
-
-#part of package SpliceTrap
-
-#Jie Wu
-use strict;
-
-my $inputfilename = $ARGV[0];
-
-# input file is a gtf file, 
-# "transcript_id" is required for each line and should not be ambiguous.
-# only the "exon" lines are used
-
-my %chr_hash; 
-my %strand_hash;
-my %tx_exons; #tx_exons{$tx_id){$start} = $size;
-
-my $linenum = 0;
-
-open(input, $inputfilename);
-
-while(my $line=<input>)
-{
- $linenum++; 
- my @a = split("\t",$line);
- if ($a[2] eq "exon")
- {
- my $txid;
- if($a[8]=~/transcript_id "(\S*?)"/)
- {
- $txid = $1;
- }
- else
- {
- die ("$inputfilename format error! No transcript_id in line $linenum \n");
- }
-
- if( exists $chr_hash{$txid} and $chr_hash{$txid} ne $a[0])
- {
- warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped \n");
- next;
- }
- if( exists $strand_hash{$txid} and $strand_hash{$txid} ne $a[6])
- {
- warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped\n");
- }
- $chr_hash{$txid} = $a[0];
- $strand_hash{$txid} = $a[6];
- $tx_exons{$txid}{$a[3]} = $a[4] - $a[3] +1;
-
- }
-
-}
-
-foreach my $txid (keys %chr_hash)
-{
- my @starts;
- my @sizes;
- foreach my $start (sort {$a<=>$b} (keys %{$tx_exons{$txid}} ) )
- {
- push (@starts, $start);
- push (@sizes, $tx_exons{$txid}{$start});
- }
- my $exon_num   = scalar(@sizes);
- my $starts_str = "";
- for(my $i = 0; $i < $exon_num; $i++)
- {
- $starts_str = $starts_str.($starts[$i] - $starts[0]).",";
- if($i>0)
- {
- warn "$txid, intron size..".($starts[$i]-$starts[$i-1])."\n" if ($starts[$i]-$starts[$i-1]>1000000);
- }
- }
- my $sizes_str  = join(",",@sizes);
- my $end = $starts[$exon_num-1] + $sizes[$exon_num-1] -1;
- print join("\t",$chr_hash{$txid}, $starts[0]-1, $end, $txid,"0",$strand_hash{$txid},$starts[0]-1, $end, "255,0,0",$exon_num,$sizes_str, $starts_str);
-        print "\n";
-}
-
-
-close(input);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/mapping_bowtie.sh
--- a/SpliceTrap/bin/mapping_bowtie.sh Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-#!/bin/bash
-#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8'
-InputFileName=$1
-faorfq=$2
-DatabasePrefix=$3
-Outputfolder=$4
-SrcFolder=$5
-Threads=$6
-DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/btw/TXdb'
-TmpFolderName=`basename $1`
-#fasta or fastq
-
-cd $Outputfolder;
-#prepare the folder
-if [ -d $TmpFolderName.result ];then
-        echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !"
-        echo "MAPPING: !!!change the name of that folder first in case I erase them..."
-        exit
-fi
-echo "MAPPING: Start mapping $InputFileName...Creating cache folder $TmpFolderName.result"
-mkdir  $TmpFolderName".result"
-mkdir $TmpFolderName".result"/cache
-
-cd $TmpFolderName".result"
-cd cache
-echo "MAPPING: Split to pieces ..."
-split -l 1000000 $InputFileName
-for name in x*
-do
-
- if [ $faorfq == "fasta" ];then
- add="-f"
- fi
-# if [ $name != $InputFileName ];then
- echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh
- echo $name >>checklist
-# fi
-done
-
-echo "MAPPING: submit scripts..."
-perl $SrcFolder/batchqsub.pl map.sh
-
-tasknum=`wc -l map.sh |tr -d "\n"`
-#checking..
-
-echo "MAPPING: mapping $InputFileName to TXdb done...start to check.."
-while [ 1 ]
-do
- if [ -f mapcheck.sh ];then
- rm mapcheck.sh
- fi
- while read checklist
- do
-
- name=`echo $checklist |tr -d "\n"`
- echo "MAPPING: checking $name...."
- readnum=`wc -l $name | cut -f1 -d" "`
- if [ $faorfq == "fasta" ];then 
- readnum=`echo "$readnum/2"|bc`
- else
- readnum=`echo "$readnum/4"|bc`
- fi
- if [ -f $name.nomt ];then
- bowtienum=`wc -l $name.nomt | cut -f1 -d" "`
- else
- bowtienum=0
- fi
- if [ $bowtienum != $readnum ];then
- echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh
-
- fi
- done <checklist
- if [ -f mapcheck.sh ];then
- checktasknum=`wc -l mapcheck.sh |tr -d "\n"`
- if [ $checktasknum == $tasknum ];then
- echo "MAPPING: warning! none of the mapping tasks properly finished!"
- fi
- echo "MAPPING: resubmit TASKS...."
- perl $SrcFolder/batchqsub.pl mapcheck.sh
- else
- break
- fi
-done
-echo "MAPPING: Done.....merging files..."
-cat *.nomt >$Outputfolder/$TmpFolderName.nomt
-cd ../../
-rm $TmpFolderName.result -rf
-#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a $DatabaseFolderTXdb -f
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/mapping_rmap.sh
--- a/SpliceTrap/bin/mapping_rmap.sh Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,80 +0,0 @@
-#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a --best /data/zhang/wuj/database/hg18/AS/TXdb.2/btw/TXdb -f -t s_1_sequence.txtparta >s_1.map &
-#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8'
-InputFileName=$1
-faorfq=$2
-DatabasePrefix=$3
-Outputfolder=$4
-SrcFolder=$5
-DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/TXdb.fasta'
-TmpFolderName=`basename $1`
-
-cd $Outputfolder;
-#prepare the folder
-if [ -d $TmpFolderName.result ];then
-        echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !"
-        echo "MAPPING: !!!change the name of that folder first in case I erase them..."
-        exit
-fi
-echo "MAPPING: Start to map $InputFileName....Creating cache folder $TmpFolderName.result"
-mkdir  $TmpFolderName".result"
-cd $TmpFolderName".result"
-mkdir cache stat
-cd cache
-echo "MAPPING: Split file..."
-split -l 1000000 $InputFileName
-echo "MAPPING: generating shell scirpts...."
-for name in x*
-do
-
- echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh
- echo $name >>checklist
-done
-tasknum=`wc -l map.sh |tr -d "\n"`
-perl $SrcFolder/batchqsub.pl map.sh
-
-echo "MAPPING: map $InputFileName to TXdb done...start to check.."
-while [ 1 ]
-do
-        if [ -f mapcheck.sh ];then
-                rm mapcheck.sh
-        fi
-        while read checklist
-        do
-
-                name=`echo $checklist |tr -d "\n"`
-                echo "MAPPING: checking $name...."
-                readnum=`wc -l $name | cut -f1 -d" "`
-                if [ $faorfq == "fasta" ];then
-                        readnum=`echo "$readnum/2"|bc`
-                else
-                        readnum=`echo "$readnum/4"|bc`
-                fi
-                if [ -f $name.nomt ];then
-                        rmapnum=`wc -l $name.nomt | cut -f1 -d" "`
-                else
-                        rmapnum=0
-                fi
-                if [ $rmapnum != $readnum ];then
- echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh
-
-                fi
-        done <checklist
-        if [ -f mapcheck.sh ];then
-                checktasknum=`wc -l mapcheck.sh |tr -d "\n"`
-                if [ $checktasknum == $tasknum ];then
-                        echo "MAPPING: warning! none of the mapping tasks properly finished!"
-                fi
-                echo "MAPPING: resubmiting TASKS...."
-                perl $SrcFolder/batchqsub.pl mapcheck.sh
-        else
-                break
-        fi
-done
-echo "MAPPING: Mapping is really done....merging files..."
-
-
-cat *.nomt >$Outputfolder/$TmpFolderName.nomt
-cd ../../
-
-rm $TmpFolderName.result -rf
-
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/mark.mt.4eland.pl
--- a/SpliceTrap/bin/mark.mt.4eland.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,56 +0,0 @@
-# this file is to convert mult mapped reads to nm reads by simply marked it as NM reads. 
-# its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM
-# later, can be used to add information for dealing with this mult reads, for example, the coverage in the region
-
-use strict;
-my $inputfilename=$ARGV[0];
-my $LongMarker="L";
-my $ShortMarker="S";
-
-
-open(input, $inputfilename);
-while(my $line=<input>)
-{
- #print "new line\n";
- chomp($line);
- my @array = split("\t",$line);
- my $match=$array[3];
- if( $array[2] eq "NM" or $match eq "")
- {
- print $line,"\n";
- next;
- }
-
- my $marker=$LongMarker.$ShortMarker;
- my @genome_pos;
- #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g)
- #this array is used to store the mapped position for this read
- my @chr;
- my @start;
- my @end;
- while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g)
- {
- push @chr, $1;
- push @start, $2;
- push @end, $3;
- }
- @chr=sort(@chr);
- if (scalar(@chr)<=1)
- {
-                print $line,"\n";
-                next;
-        }
-
- @start=sort(@start);
- @end=sort(@end);
- if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000)
- {
- print $line, "\tMT\n";
- }
- else
- {
- print $line,"\n";
-
- }
-}
-close(input);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/rmap2eland.pl
--- a/SpliceTrap/bin/rmap2eland.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,103 +0,0 @@
-use strict;
-
-my $rmapfilename=$ARGV[0];
-my $readsfilename=$ARGV[1];
-my $elandfilename=$ARGV[2];
-
-my $detectformat=`head -c 1 $readsfilename`;
-
-#system("grep \"$detectformat\" $readsfilename |sort >$readsfilename.sort");
-system("awk 'NR%2==1' $readsfilename |sort >$readsfilename.sort");
-system("sort -k4,4 $rmapfilename >$rmapfilename.sort");
-
-
-open(readsfile, $readsfilename.".sort");
-
-
-
-#$looplinenumbers=2 if ($detectformat eq ">");
-open(rmapfile, $rmapfilename.".sort");
-open(elandfile, ">".$elandfilename);
-
-while(my $rmapline=<rmapfile>)
-{
- chomp($rmapline);
- my ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
- while(my $readline=<readsfile>)
- {
- if($readline=~/^$detectformat/)
- {
- chomp($readline);
- my $readname=substr($readline, 1, length($readline)-1);
-
-
- if($readname ne $rmapreadname)
- {
- print elandfile $readname,"\tNA\tNM\n";
- next;
- }
- else
- {
- my @mapped_ids=();
- my @mapped_pos=();
- my @mapped_strand=();
- push(@mapped_ids, $mapped_id);
- push(@mapped_pos,$start);
- push(@mapped_strand,$strand);
- while(1)
- {
- $rmapline=<rmapfile>;
- chomp($rmapline);
- ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
- if( $rmapreadname eq $readname )
- {
- push(@mapped_ids, $mapped_id);
- push(@mapped_pos,$start);
- push(@mapped_strand,$strand);
- }
- else
- {
- seek(rmapfile, -1*length($rmapline)-1,1);
- print elandfile $readname,"\t";
- print elandfile "NA\t";
- print elandfile scalar(@mapped_ids),":0:0\t";
- for(my $i=0;$i<@mapped_ids;$i++)
- {
- print elandfile "/",$mapped_ids[$i];
- print elandfile ":",$mapped_pos[$i]+1;
- if($mapped_strand[$i] eq "+")
- {
- print elandfile "F0,";
- }
- else
- {
- print elandfile "R0,";
- }
-
- }
- print elandfile "\n";
- last;
- }
- }
- last;
-
- }
- }
- }
-}
-
-while(my $readline=<readsfile>)
-{
-        if($readline=~/^$detectformat/)
-        {
-             chomp($readline);
-             my $readname=substr($readline, 1, length($readline)-1);
-             print  elandfile $readname,"\tNA\tNM\n";
-        }
-}
-
-close(elandfile);
-close(rmapfile);
-
-
-close(readsfile);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/scan_nomt.pl
--- a/SpliceTrap/bin/scan_nomt.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,98 +0,0 @@
-use strict;
-use FileHandle;
-
-
-my $File1Name=$ARGV[0];
-my $File2Name=$ARGV[1];
-
-my %FileHandle1;
-my %FileHandle2;
-my %chrlist;
-
-open(File1,$File1Name);
-open(File2,$File2Name);
-
-while(my $line1=<File1>)
-{
- my $line2=<File2>;
- chomp($line1);
- chomp($line2);
- my $read1status=substr($line1,length($line1)-2,2);
- my $read2status=substr($line2,length($line2)-2,2);
- #next if( ($read1status eq "NM" or $read1status eq "MT") and ($read2status eq "NM" or $read2status eq "MT");
- my @array1=split("\t",$line1);
- my @array2=split("\t",$line2);
- my $chr1="";
- my $chr2="";
- if(scalar(@array1) eq 4)
- {
- if($array1[3]=~/(chr\S*?)\|/)
- {$chr1=$1;}
- }
- if(scalar(@array2) eq 4)
- {
-         if($array2[3]=~/(chr\S*?)\|/)
-         {$chr2=$1;}
- }
- my $chr=$chr1;
- if ($chr eq "")
- {
- next if($chr2 eq "");
- $chr=$chr2;
- }
- else
- {
-  next if($chr2 ne "" and $chr2 ne $chr);
-
- }
- next if $chr eq "";
- if(exists $chrlist{$chr})
- {
- my $fout1= $FileHandle1{$chr};
- my $fout2= $FileHandle2{$chr};
- if($read1status eq "MT")
- {
- print $fout1 $array1[0],"\t",$array1[1],"\tMT\n";
- }
- else
- {
- print $fout1 $line1,"\n";
- }
- if($read2status eq "MT")
-                {
-                        print $fout2 $array2[0],"\t",$array2[1],"\tMT\n";
-                }
-                else
-                {
-                        print $fout2 $line2,"\n";
-                }
-
-
-
- }
- else
- {
- $chrlist{$chr}=1;
- my $fout1= new FileHandle;
- open($fout1, ">".$File1Name.".".$chr);
- $FileHandle1{$chr}=$fout1;
-
- my $fout2= new FileHandle;
-                open($fout2, ">".$File2Name.".".$chr);
-                $FileHandle2{$chr}=$fout2;
-
- }
-
-}
-
-foreach my $fout1 (keys %FileHandle1)
-{
- close($fout1);
-}
-foreach my $fout2 (keys %FileHandle2)
-{
-        close($fout2);
-}
-
-close(File1);
-close(File2);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/scanbed2txdb.pl
--- a/SpliceTrap/bin/scanbed2txdb.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,490 +0,0 @@\n-#argv0: input transcript bed file\n-#argv1: output filename, will be in AS format\n-\n-use strict;\n-my $AnnoFileName = $ARGV[0];\n-my $outputFileName = $ARGV[1];\n-\n-if($AnnoFileName eq "" or $outputFileName eq "")\n-{\n-\tprint "TXDBGEN: Please specify your input files\\n";\n-\texit;\n-}\n-\n-my $cachefolder = `basename $outputFileName`;\n-chomp($cachefolder);\n-my $cachefolder = $cachefolder.".cache";\n-my $AnnoFileBase = `basename $AnnoFileName`; \n-chomp($AnnoFileBase);\n-\n-if(! -e $cachefolder)\n-{\n-\tmkdir $cachefolder or die "CHECK: cannot mkdir $cachefolder\\n";\n-\tprint "TXDBGEN: mkdir $cachefolder\\n";\n-}\n-\n-my $CacheAnnoFileName = $cachefolder."/".$AnnoFileBase.".sort";\n-#sort the annotation file\n-\n-print "TXDBGEN: sort $AnnoFileName \\n";\n-\n-system("sort -k6,6 -k1,1 -k2,2n -k3,3n $AnnoFileName >$CacheAnnoFileName");\n-\n-$AnnoFileName = $CacheAnnoFileName;\n-\n-#read the annotations into hashes\n-\n-open(AnnoFile, $CacheAnnoFileName) or die "can not open",$CacheAnnoFileName;\n-#split the genes into contigs\n-\n-my $contigid = 0;\n-my $end_tmp = 0;\n-my $chr_tmp = "chr";\n-my $strand_tmp="NA";\n-\n-my $TXnumtmp=0;\n-\n-my %eventlist =();\n-#my %eventlist_af = ();\n-#my %eventlist_al = ();\n-my %evidences = ();\n-\n-open(my $output,">$outputFileName");\n-\n-open(my $output2, ">$outputFileName.evi");\n-while(my $line =<AnnoFile>)\n-{\n-\tchomp($line);\n-\tmy @a = split("\\t",$line);\n-\tmy $chr = $a[0];\n-\tmy $start = $a[1];\n-\tmy $end = $a[2];\n-\tmy $name = $a[3];\n-\tmy @sizes = split(",",$a[10]);\n-\tmy $strand = $a[5];\n-\tmy @start_shifts = split(",",$a[11]);\n-\t#my $chrstr = $chr.$strand;\n-\tmy @starts;\n-\tmy @ends;\n-\tfor(my $i=0;$i<@start_shifts;$i++)\n-        {\n-                $starts[$i]=$start_shifts[$i]+$start;\n-                $ends[$i] = $starts[$i]+$sizes[$i];\n-        }\n-\tif($start >$end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand)\n-        {\n-                $contigid++;\n-                #$ctgmultisonum++ if $TXnumtmp>1;\n-                $TXnumtmp=0;\n-\t\tmy $annos = scanevents(\\%eventlist, "inner") ;\n-\t\t#my $annos_af = scanevents(\\%eventlist_af, "af");\n-\t\t#my $annos_al = scanevents(\\%eventlist_al, "al");\n-\t\t\n-\t\tmy $stdout = select ($output);\n-\t\tmy $eventids = printanno($annos,$chr_tmp,$strand_tmp) if(scalar(%$annos)>0);\n-\t\t#printanno($annos_af,$chr_tmp,$strand_tmp) if(scalar(%$annos_af)>0);\n-\t\t#printanno($annos_al,$chr_tmp,$strand_tmp) if(scalar(%$annos_al)>0);\n-\t\tselect($stdout);\n-\t\t$stdout = select ($output2);\n-\t\t#print cross information\n-\t\tforeach my $connect_str  (keys %$eventids) \n-\t\t{\n-\t\t\tprint $eventids->{$connect_str},"\\t";\n-\t\t\tforeach my $transcriptid  (keys %{$evidences{$connect_str}}) \n-\t\t\t{\n-\t\t\t\tprint $transcriptid,",";\n-\t\t\t}\n-\t\t\tprint "\\n";\n-\t\t}\n-\t\tselect($stdout);\n-\t\tprint "TXDBGEN: Contig ID $contigid at $chr $strand...\\n" if ( $contigid%1000 == 0);\n-\t\t%eventlist = ();\n-\t\t#%eventlist_al = ();\n-\t\t#%eventlist_af = ();\n-                #print CacheContigFile "#ctg$contigid\\n";\n-\n-        }\n-\t$TXnumtmp++;\n-        $end_tmp = $end if ($end > $end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand);\n-        $chr_tmp = $chr;\n-\t\n-\t$strand_tmp = $strand;\n-\t\n-\t#scan connections, 2 and 3 exons for CA/CS/AF\n-#\tif(scalar(@starts)>2)\n-#\t{\n-#\t\tmy $connectionstr_af =$starts[0]."-".\n-#\t\t\t\t$ends[0].",".\n-#\t\t\t\t$starts[1]."-".\n-#\t\t\t\t$ends[1]."," ;\n-#\t\n-#\t\t$eventlist_af{$connectionstr_af} = $starts[0];\n-#\t\tmy $connectionstr_al = $starts[scalar(@starts)-2]."-".\n-#\t\t\t\t$ends[scalar(@starts)-2].",".\n-#\t\t\t\t$starts[scalar(@starts)-1]."-".\n-#\t\t\t\t$ends[scalar(@starts)-1]."," ;\n-#\t\t$eventlist_al{$connectionstr_al} = $starts[scalar(@starts)-2];\n-#\t}\n-\t\t#didn\'t consider direction yet\n-        #add 1 exon for IR\n-\tfor(my $n=1;$n<4;$n++)\n-        {\n-\t\tfor(my $i=0;$i<scalar(@starts)-$n+1;$i++)\n-                {\n-\t\t\tmy $connectionstr = "";\n-\t\t\tfor(my $j=$i;$j<$i+$n;$j++)\n-                        {\n-                                $connectionstr = $connectionstr .\n-                                        $starts[$j]."-".\n-                                        $en'..b'\t\tprint "0,";\n-\t\t\tprint $a[2]-$a[0],",";\n-\t\t\tprint $a[4]-$a[0],"\\n";\n-\t\t\t\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n-                        print $id.".".$num,"[S]\\t";\n-\t\t\tmy $connectstr_ca= $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n-\t\t\t$eventids{$connectstr_ca} = $id.".".$num."[S]";\n-                        print "0\\t";\n-                        print $strand,"\\t";\n-                        print $a[0],"\\t",$a[5],"\\t";\n-                        print "255,0,0\\t";\n-                        print "2\\t";    \n-                        print $a[1]-$a[0],",";\n-                        #print $a[3]-$a[2],",";\n-                        print $a[5]-$a[4],"\\t";\n-                        print "0,";\n-                        #print $a[2]-$a[0],",";\n-                        print $a[4]-$a[0],"\\n";\n-\t\t\t\n-\t\t}\n-\t\tif($annos->{$key} eq "ir")\n-\t\t{\n-\t\t\tmy @a=split(/[,-]/,$key);\n-\n-\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n-\t\t\tmy $id="IR-IR-$chrid"."-".$a[1]."-".$a[2];\n-\t\t\tmy $num=0;\n-\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n-\t\t\t$nums_per_isoform{$id}++;\n-\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n-\t\t\tprint $id.".".$num,"[L]\\t";\n-\t\t\tprint "0\\t";\n-\t\t\tprint $strand,"\\t";\n-\t\t\tprint $a[0],"\\t",$a[3],"\\t";\n-\t\t\t$eventids{$a[0]."-".$a[3].","} = $id.".".$num."[L]";\n-\t\t\tprint "255,0,0\\t";\n-\t\t\tprint "3\\t";\n-\t\t\tprint $a[1]-$a[0],",";\n-\t\t\tprint $a[2]-$a[1],",";\n-\t\t\tprint $a[3]-$a[2],"\\t";\n-\t\t\tprint "0,";\n-\t\t\tprint $a[1]-$a[0],",";\n-\t\t\tprint $a[2]-$a[0],"\\n";\n-\t\t\t\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n-                        print $id.".".$num,"[S]\\t";\n-\t\t\t$eventids{$key} = $id.".".$num."[S]";\n-                        print "0\\t";\n-                        print $strand,"\\t";\n-                        print $a[0],"\\t",$a[3],"\\t";\n-                        print "255,0,0\\t";\n-                        print "2\\t";    \n-                        print $a[1]-$a[0],",";\n-                        #print $a[3]-$a[2],",";\n-                        print $a[3]-$a[2],"\\t";\n-                        print "0,";\n-                        #print $a[2]-$a[0],",";\n-                        print $a[2]-$a[0],"\\n";\n-\t\t\n-\t\t}\n-\t\tif($annos->{$key} eq "ss")\n-\t\t{\n-\t\t\tmy @a=split(/[,-]/,$key);\n-\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n-\t\t\tmy $type="AA";\n-\t\t\tif( ($strand eq "+" && $a[1]==$a[2]) or ($strand eq "-" && $a[3]==$a[4]))\n-\t\t\t{\n-\t\t\t\t$type="AD";\n-\t\t\t}\n-\t\t\tmy $id="$type-$type-$chrid"."-".$a[2]."-".$a[3];\n-\t\t\tmy $connect_str_L = "";\n-\t\t\tmy $connect_str_S = "";\n-\t\t\tif($a[1]==$a[2])\n-\t\t\t{\n-\t\t\t\t$connect_str_L = $a[0]."-".$a[3].",".$a[4]."-".$a[5].",";\n-\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\t$connect_str_L = $a[0]."-".$a[1].",".$a[2]."-".$a[5].",";\n-\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[3]."-".$a[5].",";\n-\t\t\t\t\n-\t\t\t}\n-\t\t\tmy $num=0;\n-\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n-\t\t\t$nums_per_isoform{$id}++;\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n-\t\t\tprint $id.".".$num,"[L]\\t";\n-\t\t\t$eventids {$connect_str_L} = $id.".".$num."[L]";\n-\t\t\tprint "0\\t";\n-\t\t\tprint $strand,"\\t";\n-\t\t\tprint $a[0],"\\t",$a[5],"\\t";\n-\t\t\tprint "255,0,0\\t";\n-\t\t\tprint "3\\t";\n-\t\t\tprint $a[1]-$a[0],",";\n-\t\t\tprint $a[3]-$a[2],",";\n-\t\t\tprint $a[5]-$a[4],"\\t";\n-\t\t\tprint "0,";\n-\t\t\tprint $a[2]-$a[0],",";\n-\t\t\tprint $a[4]-$a[0],"\\n";\n-\t\t\t\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n-                        print $id.".".$num,"[S]\\t";\n-\t\t\t$eventids {$connect_str_S} = $id.".".$num."[S]";\n-                        print "0\\t";\n-                        print $strand,"\\t";\n-                        print $a[0],"\\t",$a[5],"\\t";\n-                        print "255,0,0\\t";\n-                        print "2\\t";    \n-                        print $a[1]-$a[0],",";\n-                        #print $a[3]-$a[2],",";\n-                        print $a[5]-$a[4],"\\t";\n-                        print "0,";\n-                        #print $a[2]-$a[0],",";\n-                        print $a[4]-$a[0],"\\n";\n-\t\n-\t\n-\t\t}\n-\n-\t}\n-\treturn(\\%eventids);\n-\t\n-}\n-\n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/splitdb.sh
--- a/SpliceTrap/bin/splitdb.sh Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
-workingfolder=$1;
-cd $workingfolder
-cut -f1 ../TXdb.bed |uniq >chr.list;
-while read line ;
-do 
- chr=`echo $line |tr -d "\n"`;
- grep -w $chr ../TXdb.bed >$chr.bed;
- split -3000 $chr.bed $chr.;
- echo $chr....;
- rm $chr.bed;
-done <chr.list;
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/vslz.pl
--- a/SpliceTrap/bin/vslz.pl Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,46 +0,0 @@
-use strict;
-exit;
-
-my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";
-my $SrcFolder=$config{SrcFolder};
-
-my $BedFileName=$SrcFolder."/db/TXdb.1101.bed";
-my $RatioFileName=$ARGV[0];
-my $BedFileOutName=$ARGV[1];
-my %Ratios;
-
-open(RatioFile, $RatioFileName);
-
-while(my $RatioLine=<RatioFile>)
-{
- chomp($RatioLine);
- my @array=split("\t",$RatioLine);
- $Ratios{$array[0]}=$array[2] if $array[14] eq "passed";
-}
-
-close(RatioFile);
-
-open(BedFile, $BedFileName);
-open(BedFileOut,">".$BedFileOutName.".bed");
-print BedFileOut "track name=$BedFileOutName discription=$BedFileOutName useScore=1\n";
-while(my $BedLine=<BedFile>)
-{
- my @array=split("\t",$BedLine);
- $array[3]=~/^(\S*)\[([LS])\]/;
- my $id=$1;
- my $LS=$2;
- #print $LS,"\n";
- next if not exists $Ratios{$id};
- $array[4]=$Ratios{$id};
- $array[4]=1-$Ratios{$id} if( $LS eq 'S');
-
- $array[4]=sprintf("%.0f",$array[4]*1000);
- for (my $i=0;$i<@array; $i++)
- {
- print BedFileOut $array[$i];
- print BedFileOut "\t" if $i<scalar(@array)-1;
- }
- #print BedFileOut "\n";
-}
-close(BedFileOut);
-close(BedFile);
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/cutoffs/cutoff.pair.06.txt
--- a/SpliceTrap/cutoffs/cutoff.pair.06.txt Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,111 +0,0 @@
-9 2.7 0.60603248295953 0.171888000000001
-18 2 0.60524889781465 0.183333
-27 2 0.614125387586727 0.176226000000001
-36 1.9 0.607174152514097 0.174107
-45 1.8 0.600015884070526 0.175151
-54 1.5 0.606211216622966 0.181161
-63 1.5 0.630096537351149 0.17829
-72 1.4 0.600041179609881 0.181678
-81 1.4 0.604419014379505 0.180618
-90 1.4 0.605503082593424 0.179083
-99 1.3 0.615709740786329 0.177374
-108 1.3 0.60153811996046 0.178839
-117 1.3 0.624394647716218 0.172525
-126 1.3 0.607071122577313 0.179287
-135 1.2 0.604935986506864 0.180048
-144 1.2 0.606377388150568 0.179824
-153 1.1 0.609872307866415 0.172894
-162 1.1 0.605221663116349 0.176559
-171 1.1 0.60084962792178 0.177017
-180 1.1 0.606042806460085 0.179842
-189 1.1 0.639661884576477 0.165058000000001
-198 1 0.63233306074533 0.172839
-207 1 0.620992090280475 0.17554
-216 1 0.617967295839559 0.177413000000001
-225 1 0.643160025381914 0.165310000000001
-234 1 0.664093921751576 0.161004
-243 1 0.617444847253655 0.179608
-252 0.9 0.600141496641319 0.18301
-261 0.9 0.61458063483465 0.175533
-270 0.9 0.631646650541943 0.175805
-279 0.9 0.633124568165035 0.169468
-288 0.9 0.611511089439885 0.172566
-297 0.9 0.620072975883583 0.176992
-306 0.8 0.632771460485054 0.177489
-315 0.8 0.616480687227451 0.177778
-324 0.8 0.627081395066885 0.173799
-333 0.8 0.637882935696159 0.166883
-342 0.8 0.601440520784007 0.181724
-351 0.8 0.6308436890493 0.169283000000001
-360 0.8 0.639014375873075 0.164053
-369 0.8 0.608995842436592 0.182319
-378 0.7 0.61599095431047 0.179065
-387 0.7 0.632637490417973 0.173393000000001
-396 0.7 0.611914615604433 0.174309000000001
-405 0.7 0.636212590337212 0.168285
-414 0.7 0.601768900444485 0.17904
-423 0.7 0.652306181407458 0.165425
-432 0.7 0.61284769661545 0.17678
-441 0.7 0.620417630501693 0.179685000000001
-450 0.7 0.630731905615349 0.172766000000001
-459 0.7 0.610688367962893 0.174217
-468 0.7 0.614116817389574 0.169074
-477 0.7 0.627626017337352 0.170694000000001
-486 0.7 0.625196910435048 0.173867
-495 0.7 0.624173913851294 0.170272
-504 0.7 0.600827986205662 0.176072
-513 0.7 0.60691053900984 0.173314
-522 0.7 0.637574555072294 0.164769
-531 0.7 0.623224687213309 0.173131
-540 0.7 0.606854298464756 0.17057
-549 0.7 0.602771105882753 0.176564
-558 0.7 0.635304573393458 0.165681
-567 0.7 0.614272507442932 0.178017
-576 0.7 0.647638749954493 0.161989
-585 0.7 0.635927720935647 0.166479000000001
-594 0.7 0.654872976893549 0.165962
-603 0.6 0.623281215201779 0.169602
-612 0.6 0.620036715664558 0.174000000000001
-621 0.6 0.6207915344063 0.173405000000001
-630 0.6 0.605666404221127 0.172404000000001
-639 0.6 0.605282217899342 0.171117
-648 0.6 0.605873079749684 0.173455
-657 0.6 0.622135854785987 0.171086
-666 0.6 0.609650459959578 0.180212
-675 0.6 0.615727466486383 0.175395
-684 0.6 0.650390040594351 0.157856
-693 0.6 0.600004956285639 0.17354
-702 0.6 0.619480037234101 0.169091000000001
-711 0.6 0.620537197134915 0.170687
-720 0.6 0.63097256862101 0.168571
-729 0.6 0.616862968480703 0.176372
-738 0.6 0.619853798956006 0.169892
-747 0.6 0.668527865891001 0.157351
-756 0.6 0.609299339958315 0.173282
-765 0.6 0.606997082083795 0.176418
-774 0.6 0.60385659737608 0.178646
-783 0.6 0.618166146904953 0.170539000000001
-792 0.6 0.655214353317497 0.162477
-801 0.6 0.667723279232349 0.161535
-810 0.6 0.641501812796978 0.165034000000001
-819 0.6 0.627258070106693 0.167323
-828 0.6 0.611937213679756 0.173702
-837 0.6 0.637164577326402 0.165999
-846 0.6 0.609589936966381 0.16902
-855 0.6 0.605378844036658 0.177104
-864 0.6 0.603894805290932 0.174897
-873 0.6 0.61196823740213 0.177185000000001
-882 0.6 0.645164364920278 0.169894
-891 0.6 0.616946196689952 0.164449
-900 0.6 0.620119902403488 0.173101
-909 0.6 0.612352323569787 0.175357000000001
-918 0.6 0.604523310198474 0.181906
-927 0.6 0.639238406673874 0.161621
-936 0.6 0.626807582940753 0.177092000000001
-945 0.6 0.649699083584984 0.166011
-954 0.6 0.630080161768358 0.175451
-963 0.6 0.634570582339133 0.174267
-972 0.6 0.613974438568147 0.171149000000001
-981 0.6 0.646832147519767 0.16271
-990 0.6 0.611137442619996 0.170677
-999 0.6 0.609074843250805 0.177417
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/cutoffs/cutoff.pair.07.txt
--- a/SpliceTrap/cutoffs/cutoff.pair.07.txt Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,111 +0,0 @@
-9 3.8 0.723195172634673 0.136373
-18 3.1 0.704962064416613 0.145611
-27 2.7 0.707557510009516 0.148332
-36 2.7 0.727407566746925 0.139424000000001
-45 2.6 0.700849445263879 0.141565
-54 2.5 0.722912449885175 0.135534
-63 2.2 0.700978069072888 0.148155
-72 2.1 0.701953684982546 0.145638
-81 2.1 0.714519324839634 0.144058
-90 2 0.706725312179025 0.143797
-99 1.8 0.711963141532287 0.146397
-108 1.8 0.735577668185521 0.136275
-117 1.7 0.702810428805111 0.149303
-126 1.7 0.720543078508623 0.142071
-135 1.7 0.70149933958699 0.145076
-144 1.7 0.71795624649094 0.139588
-153 1.7 0.704835091689678 0.148916
-162 1.5 0.701545731183728 0.149502
-171 1.5 0.724190117580076 0.139706
-180 1.5 0.713150501847426 0.146353
-189 1.4 0.701931396757465 0.150412
-198 1.4 0.701219389501589 0.150832
-207 1.4 0.709880139643964 0.146954
-216 1.4 0.745895944323654 0.133521
-225 1.4 0.708094624059256 0.144051000000001
-234 1.4 0.714653417236785 0.141035
-243 1.4 0.702278770668715 0.146489
-252 1.4 0.714249389046789 0.140401
-261 1.4 0.720916794007688 0.140816
-270 1.4 0.704112631476039 0.140986
-279 1.4 0.725725706846333 0.140364
-288 1.4 0.713439724527154 0.142329
-297 1.3 0.703874456990055 0.145868
-306 1.3 0.714083967774953 0.138665
-315 1.3 0.717635749171277 0.139297
-324 1.3 0.70800987989496 0.14435
-333 1.3 0.703608941539679 0.143911
-342 1.3 0.712538846663198 0.142044
-351 1.3 0.736517626110649 0.137995
-360 1.3 0.752400150484099 0.133463
-369 1.3 0.700408117809905 0.149221
-378 1.2 0.710876753570226 0.145758
-387 1.2 0.719606098956021 0.141686
-396 1.1 0.701573537304199 0.149306
-405 1.1 0.711449711776699 0.146484
-414 1.1 0.7219174559929 0.140383
-423 1.1 0.711374621895577 0.147498
-432 1.1 0.706470648418195 0.146516
-441 1.1 0.702119897887331 0.144641
-450 1.1 0.710888241224795 0.14668
-459 1.1 0.700236191312255 0.145443
-468 1.1 0.709289110346942 0.145098000000001
-477 1.1 0.723682944143134 0.137715000000001
-486 1.1 0.711909258694556 0.148433000000001
-495 1.1 0.706050645629693 0.146412
-504 1.1 0.71026622444805 0.142836
-513 1.1 0.721840069030837 0.140123
-522 1.1 0.721616423306577 0.139145
-531 1.1 0.706599106603714 0.144744
-540 1.1 0.732906503371939 0.138953
-549 1.1 0.710747551039788 0.146028
-558 1.1 0.72403698505923 0.141907
-567 1.1 0.703858944181491 0.142942
-576 1.1 0.732227703846385 0.137018
-585 1.1 0.706081331511759 0.145895
-594 1.1 0.72851699215126 0.140481
-603 1.1 0.735776902337646 0.13646
-612 1 0.703269143581204 0.150933
-621 1 0.705868277579243 0.149988
-630 1 0.735304036557552 0.133891
-639 1 0.701919162079755 0.140775
-648 1 0.714774599695102 0.142638
-657 1 0.702484973766457 0.145888
-666 1 0.714275579611685 0.144289
-675 1 0.704751859660089 0.148070000000001
-684 1 0.725303040977116 0.139712
-693 1 0.7387858141582 0.134658
-702 1 0.737133543332787 0.135015
-711 1 0.71646444290315 0.140297
-720 1 0.735722464691124 0.136141
-729 1 0.709409780786399 0.144924000000001
-738 1 0.732557952972944 0.137912
-747 1 0.70280408850191 0.148749
-756 1 0.736014898114798 0.13601
-765 1 0.719075344267219 0.141568000000001
-774 1 0.705605602403474 0.142286
-783 1 0.714188898670159 0.143287
-792 1 0.708351298492815 0.14399
-801 1 0.701618595782959 0.140845
-810 1 0.70444245319677 0.142743000000001
-819 1 0.701265538602256 0.146322
-828 1 0.731290352078885 0.136012
-837 1 0.717812366249433 0.137717
-846 1 0.724767043040464 0.142316
-855 1 0.742408530369704 0.132985
-864 1 0.715927851943484 0.141312
-873 1 0.70674171896429 0.145039
-882 0.9 0.705311244948852 0.148678
-891 0.9 0.725745115884173 0.136097
-900 0.9 0.717545411117844 0.145251
-909 0.9 0.71139239340832 0.147716
-918 0.9 0.724110435852362 0.14723
-927 0.9 0.730304270252922 0.142741000000001
-936 0.9 0.702790994935148 0.14686
-945 0.9 0.714312901014806 0.142984
-954 0.9 0.718970705842182 0.14072
-963 0.9 0.702918434736422 0.146811
-972 0.9 0.763309367972586 0.135241
-981 0.9 0.70933162192829 0.14406
-990 0.9 0.714425781696018 0.142589
-999 0.9 0.707307109948989 0.146265
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/cutoffs/cutoff.pair.08.txt
--- a/SpliceTrap/cutoffs/cutoff.pair.08.txt Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,111 +0,0 @@
-9 5.7 0.807699596880158 0.11075
-18 4.8 0.812102488456895 0.109521
-27 4.8 0.807059148683326 0.109122
-36 4.5 0.810359129334262 0.106039
-45 4.1 0.800899562591127 0.109428
-54 4 0.819734419515034 0.104148
-63 3.6 0.801137818633087 0.112097
-72 3.5 0.805611266779768 0.107837
-81 3.2 0.804721543883597 0.110678
-90 3.2 0.81977543427128 0.105428
-99 3.2 0.80949445276013 0.109698
-108 3.1 0.800113619307504 0.115339
-117 2.9 0.80163357132874 0.111922
-126 2.9 0.807430653105459 0.110639
-135 2.8 0.809101610526483 0.109881
-144 2.8 0.808015366939668 0.109303
-153 2.8 0.815481435245632 0.106845
-162 2.7 0.813972734460064 0.108256
-171 2.7 0.803878583908181 0.111767
-180 2.7 0.814908823498375 0.110845
-189 2.5 0.803934003900257 0.112334
-198 2.5 0.819507987133127 0.106736
-207 2.5 0.816975135145848 0.109487
-216 2.5 0.801969702289987 0.11426
-225 2.5 0.814854242652841 0.106264
-234 2.5 0.809009743289239 0.110027000000001
-243 2.5 0.816859180637723 0.10951
-252 2.5 0.825923196681225 0.104742
-261 2.3 0.805863672217281 0.108068
-270 2.3 0.812114384187481 0.110547
-279 2.3 0.811109163497932 0.109461
-288 2.3 0.80680260883464 0.104169
-297 2.3 0.800054845624205 0.110627
-306 2 0.80196269021579 0.117711
-315 2 0.809945512786952 0.109737
-324 2 0.819364610024312 0.110094
-333 2 0.812712294607599 0.111396
-342 2 0.801568817408624 0.113051
-351 2 0.804296495631151 0.112833
-360 2 0.809648638295373 0.106319
-369 2 0.810216908330331 0.110967
-378 2 0.80081744573067 0.112898
-387 2 0.809614612996421 0.114576
-396 2 0.800940767696918 0.109701
-405 1.8 0.800397828290365 0.115299
-414 1.8 0.80375463158553 0.116013
-423 1.8 0.80204726507529 0.110069
-432 1.8 0.829673313493101 0.109584
-441 1.8 0.801512586472861 0.110591
-450 1.8 0.801317568038407 0.11341
-459 1.8 0.806266605408301 0.11489
-468 1.8 0.804368073911341 0.112083
-477 1.8 0.80034140400705 0.113126
-486 1.8 0.803037870942011 0.112527
-495 1.8 0.816147055378533 0.113451
-504 1.8 0.804009255355535 0.113167
-513 1.8 0.809536527236661 0.111121
-522 1.8 0.810250462485448 0.113045
-531 1.8 0.813388275601197 0.117925
-540 1.8 0.810529197259932 0.111664
-549 1.8 0.80286846045568 0.114872
-558 1.8 0.805231767462103 0.112439
-567 1.8 0.803715104541846 0.112076
-576 1.8 0.800856311237744 0.113416
-585 1.8 0.815485369320364 0.113187
-594 1.8 0.810404807750137 0.111576
-603 1.8 0.80944688874663 0.114048
-612 1.8 0.813671662890222 0.107955
-621 1.8 0.805581198188585 0.111274
-630 1.5 0.801229670148542 0.121577
-639 1.5 0.812201198793304 0.109719
-648 1.5 0.80667869232878 0.111841
-657 1.5 0.817613487850624 0.110405
-666 1.5 0.816866335035283 0.107114
-675 1.5 0.8101804833249 0.117018
-684 1.5 0.818509280416726 0.115303
-693 1.5 0.80107589244594 0.116714
-702 1.5 0.829601652947073 0.107173
-711 1.5 0.820953668506561 0.108043
-720 1.5 0.811426464962464 0.114567
-729 1.5 0.800211543269215 0.1163
-738 1.5 0.814971683053337 0.111887
-747 1.5 0.810647742818646 0.111346
-756 1.5 0.823589356371534 0.1075
-765 1.5 0.808183381146845 0.115074
-774 1.5 0.804024248189691 0.11739
-783 1.5 0.809214446523965 0.114048
-792 1.5 0.816922307151774 0.116292
-801 1.5 0.817083370511117 0.107909
-810 1.5 0.803218112416446 0.122658
-819 1.5 0.801792365687339 0.115227
-828 1.5 0.824077216658734 0.106899
-837 1.5 0.808010406353855 0.115016
-846 1.5 0.805503181301868 0.111932
-855 1.5 0.810848938809874 0.114185
-864 1.5 0.80077383728855 0.11448
-873 1.5 0.804510616030174 0.115034
-882 1.5 0.800413194808912 0.119586
-891 1.5 0.820672192506787 0.110138
-900 1.5 0.810326379209945 0.112103
-909 1.5 0.811096513155687 0.117781
-918 1.5 0.804732736125644 0.117636
-927 1.5 0.807950118492599 0.113333
-936 1.5 0.807915776288532 0.113945
-945 1.5 0.832853748697962 0.107864
-954 1.5 0.819637006281129 0.112646
-963 1.5 0.807519443743277 0.110865
-972 1.5 0.816853380970702 0.113831
-981 1.5 0.802684401572777 0.116302
-990 1.5 0.809446019377896 0.112676
-999 1.5 0.804655052306582 0.112474
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/refGenes.bed
--- a/SpliceTrap/refGenes.bed Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,66259 +0,0 @@\n-chr1\t67092175\t67134971\tNM_001276351\t0\t-\t67093004\t67127240\t0\t8\t1429,187,70,113,158,92,86,42,\t0,3059,4076,23176,33576,34990,38966,42754,\n-chr1\t67092175\t67134971\tNM_001276352\t0\t-\t67093579\t67127240\t0\t9\t1429,70,145,68,113,158,92,86,42,\t0,4076,11062,19401,23176,33576,34990,38966,42754,\n-chr1\t67092175\t67134971\tNR_075077\t0\t-\t67134971\t67134971\t0\t10\t1429,70,145,68,143,113,158,92,86,42,\t0,4076,11062,19401,21438,23176,33576,34990,38966,42754,\n-chr1\t201283451\t201332993\tNM_000299\t0\t+\t201283702\t201328836\t0\t15\t453,104,395,145,208,178,63,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36364,36815,38526,39561,40976,41489,42302,45310,46622,\n-chr1\t201283451\t201332993\tNM_001005337\t0\t+\t201283702\t201328836\t0\t14\t453,104,395,145,208,178,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36815,38526,39561,40976,41489,42302,45310,46622,\n-chr1\t8352403\t8423687\tNM_001042682\t0\t-\t8355086\t8364133\t0\t13\t2717,181,147,721,223,1379,114,162,200,93,163,81,127,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,71157,\n-chr1\t8352403\t8817640\tNM_001042681\t0\t-\t8355086\t8656297\t0\t23\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,464756,\n-chr1\t8352403\t8817640\tNM_012102\t0\t-\t8355086\t8656297\t0\t24\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,185,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,440000,464756,\n-chr1\t33513998\t34165274\tNM_001281956\t0\t-\t33519517\t34165097\t0\t71\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,189,174,249,174,195,186,189,147,189,114,81,146,178,189,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,364,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,43735,45301,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,650912,\n-chr1\t33513998\t34165842\tNM_052896\t0\t-\t33519517\t34165813\t0\t70\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,174,174,195,186,189,147,189,114,81,146,178,189,126,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,96,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,91864,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,651748,\n-chr1\t41847188\t42035925\tNR_038261\t0\t-\t42035925\t42035925\t0\t4\t1257,219,112,119,\t0,1682,71224,188618,\n-chr1\t75202130\t75611114\tNM_001130058\t0\t-\t75203726\t75541447\t0\t24\t1703,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,49,39,82,75,\t0,9337,11574,11788,12474,13623,15735,16359,17126,17669,20230,25595,31855,34856,36382,39870,40755,49079,72827,98481,137451,194452,339304,408909,\n-chr1\t75202132\t75611116\tNM_001320285\t0\t-\t75203726\t75300660\t0\t26\t1701,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,91,49,39,67,82,77,\t0,9335,11572,11786,12472,13621,15733,16357,17124,17667,20228,25593,31853,34854,36380,39868,40753,49077,72825,98479,109482,137449,194450,196222,339302,408907,\n-chr1\t75202132\t75611116\tNM_001320287\t0\t-\t75203726\t75300660\t0\t25\t1701,85,'..b'0123\t230123\t0\t8\t106,146,111,89,40,166,208,100,\t0,3710,5076,6459,7823,9904,12166,12494,\n-chr15_KI270727v1_random\t241092\t241174\tNR_049886\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n-chr15_KI270727v1_random\t241092\t241174\tNR_049895\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n-chr15_KI270727v1_random\t241092\t241174\tNR_128721\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n-chr15_KI270727v1_random\t372321\t373405\tNM_001004719\t0\t+\t372419\t373361\t0\t1\t1084,\t0,\n-chr16_KI270728v1_random\t17231\t19833\tNM_001099687\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNM_016212\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110886\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110897\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110910\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110911\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n-chr16_KI270728v1_random\t17233\t19273\tNM_001205259\t0\t+\t17263\t18809\t0\t2\t392,477,\t0,1563,\n-chr16_KI270728v1_random\t17233\t19273\tNR_110914\t0\t+\t19273\t19273\t0\t3\t392,289,477,\t0,1062,1563,\n-chr16_KI270728v1_random\t17250\t19837\tNM_001330061\t0\t+\t17263\t18809\t0\t2\t375,1041,\t0,1546,\n-chr16_KI270728v1_random\t17251\t19833\tNM_001330066\t0\t+\t17263\t18809\t0\t2\t374,1037,\t0,1545,\n-chr16_KI270728v1_random\t933855\t936466\tNR_110898\t0\t-\t936466\t936466\t0\t3\t1039,290,268,\t0,1251,2343,\n-chr16_KI270728v1_random\t1001629\t1037618\tNR_135178\t0\t+\t1037618\t1037618\t0\t25\t105,103,322,130,110,172,175,173,104,151,211,133,128,196,134,396,152,192,188,193,183,182,148,197,233,\t0,1009,5706,6177,6753,7361,8561,11932,12489,13586,16444,16928,17168,17388,20254,20921,21434,22284,28770,30290,31190,34103,35109,35455,35756,\n-chr16_KI270728v1_random\t1331814\t1346848\tNR_130771\t0\t-\t1346848\t1346848\t0\t3\t5057,93,295,\t0,6640,14739,\n-chr16_KI270728v1_random\t1331814\t1346848\tNR_130772\t0\t-\t1346848\t1346848\t0\t2\t5057,295,\t0,14739,\n-chr17_GL000205v2_random\t54856\t57966\tNR_003682\t0\t-\t57966\t57966\t0\t1\t3110,\t0,\n-chr22_KI270731v1_random\t69141\t86923\tNR_003267\t0\t+\t86923\t86923\t0\t13\t189,171,135,81,193,158,150,137,188,128,113,114,280,\t0,732,4094,4362,9648,10233,11930,12603,16240,16660,16890,17084,17502,\n-chr22_KI270733v1_random\t122272\t135645\tNR_046235\t0\t+\t135645\t135645\t0\t1\t13373,\t0,\n-chr22_KI270733v1_random\t130203\t135280\tNR_003287\t0\t+\t135280\t135280\t0\t1\t5077,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_106782\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_128715\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_128716\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_128717\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t125128\t125189\tNR_037458\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t125128\t125189\tNR_128714\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t125930\t127799\tNR_003286\t0\t+\t127799\t127799\t0\t1\t1869,\t0,\n-chr22_KI270733v1_random\t128876\t129032\tNR_003285\t0\t+\t129032\t129032\t0\t1\t156,\t0,\n-chr22_KI270733v1_random\t170214\t170275\tNR_037458\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t170214\t170275\tNR_128714\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t171011\t172880\tNR_003286\t0\t+\t172880\t172880\t0\t1\t1869,\t0,\n-chr22_KI270733v1_random\t173955\t174111\tNR_003285\t0\t+\t174111\t174111\t0\t1\t156,\t0,\n-chr22_KI270734v1_random\t72453\t74335\tNR_136575\t0\t+\t74335\t74335\t0\t1\t1882,\t0,\n-chr22_KI270734v1_random\t90958\t98408\tNR_136574\t0\t-\t98408\t98408\t0\t4\t429,155,73,884,\t0,5076,5960,6566,\n-chr22_KI270734v1_random\t131493\t137393\tNM_005675\t0\t+\t131645\t136994\t0\t5\t262,161,101,141,549,\t0,342,3949,4665,5351,\n-chr22_KI270734v1_random\t138078\t161750\tNM_001195226\t0\t-\t138479\t156446\t0\t14\t589,89,99,176,147,93,82,80,117,65,150,35,209,62,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23610,\n-chr22_KI270734v1_random\t138078\t161852\tNM_016335\t0\t-\t138479\t161586\t0\t15\t589,89,99,176,147,93,82,80,117,65,150,35,209,313,164,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23235,23610,\n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/splice_trap.xml
--- a/SpliceTrap/splice_trap.xml Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,35 +0,0 @@
-<tool id="splice_trap" name="SpliceTrap" version="1.0.0">
-    <description>A statistic tool for quantifying exon inclusion ratios in paired-end RNA-seq data, with broad applications for the study of alternative splicing.
-    </description>
-    <requirements>
-        <requirement type="package" version="1.2.1.1">bowtie</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-        perl $__tool_directory__/SpliceTrap.pl -p 8 -l $__tool_directory__ -d hg38v3 -1 "$input1" -2 "$input2" -s "$read_size" "$output1" "$output2"
-    ]]></command>
-    <inputs>
-        <param type="data" name="input1" format="fastq" />
-        <param type="data" name="input2" format="fastq" />
-        <param name='read_size' type='integer' value='50' label="Read size"  />
-    </inputs>
-    <outputs>
-        <data name="output1" format="txt" />
-        <data name="output2" format="txt" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="input1" value="input1.fastq"/>
-            <param name="input2" value="input2.fastq"/>
-            <output name="output1" file="output1.txt"/>
-            <output name="output2" file="output2.txt"/>
-        </test>
-    </tests>
-    <help> 
-        **SpliceTrap**
-    </help>
-    <citations>
-        <citation type="bibtex">
-            http://rulai.cshl.edu/splicetrap
-        </citation>
-    </citations>
-</tool>
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/src/Makefile
--- a/SpliceTrap/src/Makefile Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-all:
- g++ -O2 splicetrap.estimate.cpp -o Pair_estimate_c
- mv Pair_estimate_c ../bin
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/src/splicetrap.estimate.cpp
--- a/SpliceTrap/src/splicetrap.estimate.cpp Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,854 +0,0 @@\n-//Author: Jie Wu@CSHL\n-//TO replace the original Perl code\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <iostream>\n-#include <fstream>\n-#include <string.h>\n-#include <unistd.h>\n-#include <sstream>\n-#include <map>\n-#include <vector>\n-#include <math.h>\n-#include <time.h>\n-#include <limits>\n-#include <iomanip>\n-\n-using namespace std;\n-\n-int MAX_LINE_LEN = 1024*1024;\n-\n-void printusage()\n-{\n-    cout<< "\\tUsage:"<<endl;\n-    cout<<"\\t-s\\tread_size"<<endl;\n-    cout<<"\\t-b\\tIRM file"<<endl;\n-    cout<<"\\t-f\\tFZM file"<<endl;\n-    cout<<"\\t-1\\tmapping result 1"<<endl;\n-    cout<<"\\t-2\\tmapping result 2"<<endl;\n-    cout<<"\\t-d\\tTXdb database file"<<endl;\n-    cout<<"\\t-o\\toutput prefix"<<endl;\n-    \n-}\n-\n-struct ReadPairInfo\n-{\n-\tbool isPaired;\n-\tvector<int> IsoFlag; //flag as 1 if mapped to the isoform\n-\tvector<int> Size; //fragment size in the isoform\n-\tvector<int> Pos;\n-};\n-\n-class TXdb_entry{\n-\tpublic:\n-\t string chrid;\n-\t //int start;\n-\t //int end;\n-\t string id;\n-\t bool iscomplete;\n-\t int start;\n-\t int end;\n-\t int exonstarts[3];\n-\t char strand;\n-\t int exonsize[3];\n-\t int len1;\n-\t int len2;\n-\t vector< ReadPairInfo > pairs;\n-\t int snum1; //single-end reads\n-\t int snum2;\n-\t int snum12;\n-\t float ir, bir;\n-\t int enum1; //exon body reads\n-\t int enum2;\n-\t int enum3;\n-\t \n-\t int jnum12;\n-\t int jnum23;\n-\t int jnum13;\n-\n-\t int totalreadnum;\n-\t void estimate(vector<double> &FZM, map<string, vector<double> > &IRM, int &read_size)\n-\t {\n-\t\t float Maxe=0;\n-\t\t float Max = -numeric_limits<float>::max();\n-\t\t float BMaxe=0;\n-\t\t float BMax=-numeric_limits<float>::max();\n-\t\t for(float e1=0.001; e1<1;e1=e1+0.001)\n-\t\t {\n-\t\t\t float efix1 = e1*len1/\n-\t\t\t               (e1*exonsize[1]+len2);\n-\t\t\t float efix2 = 1-efix1;\n-\t\t\t float LL = snum12*log( efix1/(len1-read_size+1)\n-\t\t\t          + efix2/(len2-read_size+1) )\n-\t\t\t          + snum1*log(efix1/(len1-read_size+1)) \n-\t\t\t\t  + snum2*log(efix2/(len2-read_size+1));\n-\t\t\t\t  //cout<<LL<<endl;\n-\t\t\t\t  //cout<<efix1<<"\\t"  <<pairs.size()<<"\\t"  <<len2<<"\\t"\t<<endl;\n-\t\t         //int num1=0, num2=0,num12=0;\n-\t\t         for(int i=0;i<pairs.size();i++)\n-\t\t\t  {\n-                            if(pairs[i].IsoFlag[0] == 1)\n-\t\t\t    {\n-\t\t\t\t    if(pairs[i].IsoFlag[1] == 0)\n-\t\t\t\t    {\n-\t\t\t\t\t   // num1++;\n-\t\t\t\t\t    float tmp=FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1);\n-\t\t\t\t\t    if (tmp == 0)\n-\t\t\t\t\t    {\n-\t\t\t\t\t    \t LL = LL - 308;\n-\t\t\t\t\t    }\n-\t\t\t\t\t    else\n-\t\t\t\t\t    {\n-\t\t\t\t\t\t LL = LL + log(tmp);\n-\t\t\t\t\t    }\n-\t\t\t\t    }\n-\t\t\t\t    else\n-\t\t\t\t    {\n-\t\t\t\t\t   // num12++;\n-\t\t\t\t\t    float tmp=(FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1)) \n-\t\t\t\t\t          + (FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1));\n-\t\t\t\t  \t    if(tmp ==0)\n-\t\t\t\t\t    {\n-\t\t\t\t\t    LL = LL -308;\n-\t\t\t\t\t    }\n-\t\t\t\t\t    else\n-\t\t\t\t\t    {\n-\t\t\t\t\t\t    LL = LL + log(tmp);\n-\t\t\t\t\t    }\n-\t\t\t\t    }\n-\t\t\t    }\n-\t\t\t    else\n-\t\t\t    {\n-\t\t\t\t    //num2++;\n-\t\t\t\t    float tmp = FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1);\n-\t\t\t\t    if(tmp ==0)\n-\t\t\t\t    {\n-\t\t\t\t    LL= LL - 308; \n-\t\t\t\t    }\n-\t\t\t\t    else\n-\t\t\t\t    {\n-\t\t\t\t\t    LL = LL +log(tmp);\n-\t\t\t\t    }\n-\t\t\t    }\n-\t\t\t  }\n-\t\t\t  //cout<<num1<<"\\t"<<num2<<"\\t"<<num12<<"\\n";\n-\t\t\t  //cout<<LL<<endl;\n-\t\t\t  \n-\t\t\t  if(IRM["CA"].size() > 0)\n-\t\t\t  {\n-\t\t\t\t  float BLL;\n-\t\t\t\t  string eventtype =id.substr(0,2);\n-\t\t\t\t  if(!eventtype.compare("ME"))\n-\t\t\t\t  {\n-\t\t\t\t\t  eventtype = "CA";\n-\t\t\t\t  }\n-\t\t\t\t  BLL = LL + log(IRM[eventtype][int(e1/0.001)]);\n-\t\t\t\t  if(BLL > BMax)\n-\t\t\t\t  { \n-\t\t\t\t\t  BMax = BLL;\n-\t\t\t\t\t  BMaxe = e1;\n-\t\t\t\t  }\n-\t\t\t  }\n-\t\t\t\t  //cout<<LL<<endl;\n-\t\t\t  if(LL > Max)\n-\t\t\t  {\n-\t\t\t\t  Max= LL;\n-\t\t\t\t  Maxe = e1;\n-\n-\t\t\t  }\n-\t\t\t\t  \n-\t\t }\n-\t\t ir= Maxe;\n-\t\t bir= BMaxe;\n-\t }\n-\t\t\n-\t TXdb_entry(char *line)\n-\t {\n-\t\t size_t found;\n-\t\t string linestr(line);\n-\t\t found = linestr.find("[L]");\n-\t\t if(found != string::npos)\n-\t\t {\n-\t\t\t iscomplete=1;\n-\t\t\n-\t\t \tchar *token;\n-\n-\t\t \ttoken = strtok(line,"\\t");\n-\t\t \tif(token==NULL)\n-\t\t \t{\n-\t\t\t\t cout<<"error1"<<endl;\n-\t\t\t\t exit(0);\n-\t\t\t }\n-\t\t\t else\n-\t\t\t {\n-\t\t\t\t c'..b'gle_read_num<<endl;\n-      cout<<"Paired reads used:\\t"<<total_pair_read_num<<endl;\n-\n-      log_file<<"#total_read_num:\\t"<<total_read_num*2<<endl;\n-      log_file<<"#total_single_read_num:\\t"<<total_single_read_num<<endl;\n-      log_file<<"#total_pair_read_num:\\t"<<total_pair_read_num<<endl;\n-      \n-      //Calculate ratios and output\n-      cout<<"Now doing estimations and output to "<<ratio_file_name<<"\\t"<<num_file_name<<endl;\n-      ofstream ratio_file(ratio_file_name.c_str());\n-      ofstream num_file(num_file_name.c_str());\n-      map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.begin();\n-      for(;txdb_it!=TXdb_entries.end();txdb_it++)\n-      {\n-\t      string eventid = txdb_it->first;\n-\t      //cout<< eventid <<endl; \n-\t      txdb_it->second.estimate(FZM,IRM,read_size);\n-/*\t      ratio_file<< eventid<<"\\t"\n-\t      \t\t<<setprecision(4)<<fixed\n-\t                <<txdb_it->second.ir<<"\\t"\n-\t\t\t<<txdb_it->second.bir<<"\\t"\n-\t\t\t<<"NA\\tNA\\tNA\\t"\n-\t\t\t<<txdb_it->second.jnum12<<"\\t"\n-\t\t\t<<txdb_it->second.jnum23<<"\\t"\n-\t\t\t<<txdb_it->second.jnum13<<"\\t"\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"\n-\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"\n-\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"\n-\t\t\t<<txdb_it->second.exonsize[2]<<"\\t"\n-\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<endl;\n-\t\t*/\n-\t      ratio_file<<eventid<<"\\t"\n-\t      \t\t<<setprecision(4)<<fixed\n-\t\t\t<<txdb_it->second.ir<<"\\t"  //1\n-\t\t\t<<txdb_it->second.bir<<"\\t" //2\n-\t\t\t<<txdb_it->second.chrid<<"\\t" //3\n-\t\t\t<<txdb_it->second.start<<"," //\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t" //4\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"//5\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"//6\n-\t\t\t<<txdb_it->second.strand<<"\\t"//7\n-\t\t\t<<txdb_it->second.jnum12<<"\\t"//8\n-\t\t\t<<txdb_it->second.jnum23<<"\\t"//9\n-\t\t\t<<txdb_it->second.jnum13<<"\\t"//10\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"//11\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"//12\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"//13`\n-\t\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<"\\t"//14\n-\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"//15\n-\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"//16\n-\t\t\t<<txdb_it->second.exonsize[2]<<endl;//17\n-\n-\t      num_file<< eventid<<"\\t"\n-\t\t\t<<txdb_it->second.chrid<<"\\t"\n-\t\t\t<<txdb_it->second.start<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t"\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"\n-\t\t\t<<txdb_it->second.strand<<"\\t"\n-                <<txdb_it->second.enum1<<"\\t"\n-\t\t\t<<txdb_it->second.enum2<<"\\t"\n-\t\t\t<<txdb_it->second.enum3<<"\\t"\n-\t\t\t<<txdb_it->second.jnum12<<"\\t"\n-\t\t\t<<txdb_it->second.jnum23<<"\\t"\n-\t\t\t<<txdb_it->second.jnum13<<endl;\n-      }\n-      log_file.close();\n-      ratio_file.close();\n-      num_file.close();\n-      clock_finish = clock();\n-      cout<<"Done! time used:"<<(double)(clock_finish-clock_start)/CLOCKS_PER_SEC<<" seconds"<<endl;\n-\n-      \n-}\n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/input1.fastq
--- a/SpliceTrap/test-data/input1.fastq Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,200484 +0,0 @@\n-@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/1\n-ATCTTTTGTGGCTACAGTAAGTTCAATCTGAAGTCAAAACCAACCAATTT\n-+\n-5.544,444344555CC?CAEF@EEFFFFFFFFFFFFFFFFFEFFFEFFF\n-@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/1\n-TCCATACATAGGCCTCGGGGTGGGGGAGTCAGAAGCCCCCAGACCCTGTG\n-+\n-GFFFGFFBFCHHHHHHHHHHIHEEE@@@=GHGHHHHHHHHHHHHHHHHHH\n-@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/1\n-GTATAACGCTAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n-+\n-55*'+&&5'55('''888:8FFFFFFFFFF4/1;/4./++FFFFF=5:E#\n-@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/1\n-AACGGATCCATTGTTTCGAGAACGTGATCGCCCTCATCTACCTAGCCTCA\n-+\n-D<@DDA@A:AHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHBHHHHHHH\n-@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/1\n-GCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCACCATCGTGC\n-+\n-F@FFFGGFGFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHH\n-@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/1\n-TGGACAGTTGCTCCTGGCTCCAGAACCTGTCTTGCAAGGGACAGTGGGGT\n-+\n-A:AA@HHHHHHHHHHHHHHHHHHHIHHHHHHHHHHGF=GFHHHH@@?AA*\n-@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/1\n-ATTAGGAAACATGGAATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTT\n-+\n-@<><CHHHHHHHHHHHHHHHHHGGHHHHHHHHHHGGGHHHHHHHHGGGGG\n-@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/1\n-CAATAGCCAGATGGTTGGTGGGGCAGCCAGGCAGGGAGGACCCAGGGCTG\n-+\n-555544555544555;AAAAFFBBEEEE;=FCB9F===<<FFFFEFFEEE\n-@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/1\n-GTGCCAAATTGTCACATTCGAGCTTGAGGCTGTGGTACTGAGCTTGCAGT\n-+\n-D>BFD@@?>>54454?FFGFGGGGGGGGGGGGGEGGGGGGGGGEGGGGGG\n-@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/1\n-CCCGGCCTAACTTTCATTTAATTTCAATGAATTTTCTTTTTTTTTTTTTT\n-+\n-56455==@=>HHHHHHHHHGHHHHHHHHGH=HHHHHHEEEECEEEEEEEE\n-@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/1\n-GGAGAAGGGGCGAGGGAAGAAGACCTTTGCTATCCCAGATACCAGGACTG\n-+\n-55544145444/444GFDFG9A@@@DD>.F@><<=FDD@AGG>GGEGGEG\n-@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/1\n-CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n-+\n-FBDFFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHH\n-@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/1\n-ATGAGAAGGAGCCATCAGGACCTTATGAAAGCGACGAAGACAAGAGTGAT\n-+\n-55554DDFFFBBFFFHHGHHHHHHHHHHHHHHHHHHDHH8HHHHHHHHFH\n-@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/1\n-CGAAAACCAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCC\n-+\n-555,4&4551FFFFFBF3BDFFFFFFEFFFFBEFFFFFFDFFFFFFFFF=\n-@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/1\n-GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n-+\n-HGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/1\n-CAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCCTGCGACC\n-+\n-54-55A@A@@HHHHHFFGGE555558<=;=55555AAAA?HHHHH>8@@>\n-@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/1\n-CTTGCCTCACATGTCAGGGCAGGTATCCACCTAACCAGGCTGCAGGGGAG\n-+\n-555555544444544HHHHGHHHHHHHHHHHHHHHHHHHHHH5@HFFF*F\n-@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/1\n-CCAGCCTTGATACAGCATTTTCCACTTCTCTCTGTAGAGATCAGACGATT\n-+\n-55555555(5@>@=:@=8.@04554CCCCC.441445444-555445555\n-@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/1\n-CAGCATCCTGCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTGG\n-+\n-55555A@AAAGGEGGGGGGGGGGGGGGGGGGGCGGGFEGFGGGGFGGCGG\n-@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/1\n-CACCATCGTGCCCGTTCTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCC\n-+\n-5544455,0545445FFFEEFFFFFFFFFEEBC;D6<5-?FFFFFFFFFF\n-@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/1\n-CTCTCCTCTAACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGGCAGGT\n-+\n-55,34)4-53HHEHHGGGGG7DC?@GG;BGGEGGGGGGGGGGGGGGGGGA\n-@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/1\n-CAAAAATGTAGCTGCCCTGACCTGGTCTCCCCTGACCCTTCCACGGGGCT\n-+\n-56624545442525554455FFECECGEDGFF8DF###############\n-@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/1\n-GACAGATGATGTCCAAGCCCCTACATGCCCCAGACCCCAGGGCACGGCTG\n-+\n-##################################################\n-@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/1\n-ATCTCGTAGTACATCACATAGTGACGCTGCATCTCTGACTTCTCACTGGC\n-+\n-5653445555HHHHHHHHHH9;@=@HHHHHHDHHHHHHHHHHHHHHHHDH\n-@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/1\n-CACCATTTGACCCTGAGCCAG"..b':6601:197274#0/1\n-CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHH\n-@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/1\n-TGTGTCTTGTGCTAAACATTCCTTTCTCTCCGTGCCTCTGTCTCCCCTCT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/1\n-CAGCCCTCTCACCCTGGTACTGCATGCACGCAATGCTAGCTGCCCCTTTC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHEHHHGHIIHHHHHAHHHHHHHHHGH\n-@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/1\n-CTGGGACCCAGGCAGCTGCCACCTTGTCACCATGAGAGAATTTGGGGAGT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHG\n-@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/1\n-GGACTGTCCACCAGGTCCCGACGGGCAGGAATGCAGATGGGTACCTTTCC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHFHHHHHHHE\n-@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/1\n-GCCAGTGGTGGGCATGCGGCTGCGGAGCACGTCCTGAGCTGTGGGGACGT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHBDDBB@9@AAHHGHHHHHGHHDHHH\n-@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/1\n-CTACTCCTTCCGCAGCAGGGAGGTGTGCAGAGCCGTGCTCAGCTTCCTCT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHH8HAGFGGFHHHFGGHHHHHGHHHIHGH\n-@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/1\n-GTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGAGCCAT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/1\n-GCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTC\n-+\n-55555?>?>>5555444555444442=5<=55444C=6C>2555551544\n-@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/1\n-CTGGGACCTGCGGGAGGGCCGCCAGCTGCAGCAGCATGACTTCAGCTCCC\n-+\n-HHHHHHHHHHHHHHHHHGEHHHHHHHCHHHFHHHHHEFGDFHHHEHBFHH\n-@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/1\n-AAACAAAACATTTTCCTTTGGGTTTTTTTTTTTCTTTCTTTTTTCTCCGC\n-+\n-HHHHHHGGGHHHHHHHHHHHHHHHHHHHGGGGGBGGGBHHGGGGGGGHHH\n-@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/1\n-GTTCCAACCACCGCCGGGGAGGGAGAGGGCCCCTGTCCCTGCAGGGCCGC\n-+\n-ADAD?DEFBEHHHHHCCDGDHCEEHCGBGAHHHHHCDCGD5555424554\n-@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/1\n-CCTGGAACTGCCTGACCATAGTCTGATTCTGCAGGTCCCAGACCACAATG\n-+\n-?ACDC?DDGG=DDD>55554GGFFADDDA==<==>D=DAD5445544445\n-@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/1\n-CACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTGTGCATCTCG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/1\n-CTGCCTGGCACGCACCCGGTGGCTGCACCATCCACACGCAAGACTGCAAC\n-+\n-HHHHHHHHHHHHHHHHHDHHHHHGHHFHHHHHHHHHHHFHHHFHGHFHHH\n-@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/1\n-CGGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAA\n-+\n-HHHHHHHGEHHHHHHHGGGGEHGHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/1\n-CGGGCTCCTCGCACCTACCCCAGCAACTCAAATTCACCACCTCGGACTCC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/1\n-ATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTTTTTTTTTTTTTTTTT\n-+\n-HHHHHHHGGHHHHGHHHGHC83=;><=@=<CCCCCCCCCCCCCCCCCCCC\n-@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/1\n-CCTGGGTCGCCCACCCTCACCCTGCTCCTCCCAGCTCAGCTAAGCTCGTC\n-+\n-HHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/1\n-GTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIH\n-@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/1\n-CACCAACCAGCCGCGGGCCGCGCAGCTGGTGGACAAGGACAGCACCTTCC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHGHH\n-@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/1\n-CAGAGCTTAGCGGGGGGCTGAGCTGGTGTCTTTGAACCTCTAGTCCCAGG\n-+\n-HHHHHHHHHHHHHHHCGGGHEHHFHHEHHHHHHHHHHHEHHHHHFHHHHH\n-@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/1\n-TGTGTGCCCCATTTCTCCATATAGTCTTCCTCAGGCAGGTCCTAGGTCCC\n-+\n-??DDDEDECC<=@><CCC@?<<<=@EGGGGG?GGGGCGCE>@@6=55554\n-@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/1\n-CCCAGGTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGA\n-+\n-HHHHHHHHHGHHHHHHHHGHHHHHHHHHHHHFHHHHHHHHHHGHHHHAHE\n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/input2.fastq
--- a/SpliceTrap/test-data/input2.fastq Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,200484 +0,0 @@\n-@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/2\n-CGGATTTCAGCTACTGCAAGCTCAGTACCACAGCCTCAAGCTCGAATGTG\n-+\n-HH;HHHHHGHHHHHHHHHHGHDHEHHHHHEHHHHBHHFHHHHHHHHHD0F\n-@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/2\n-GAGTGCGAGGGAAGTCAGGGGAGGATCGCGAGGGAAGCCAGGGGAGGATC\n-+\n-HHHHHBF8G>&4555GGGGGHHGGEHHHHHHHHH=HHHHHHHHHHHGB9H\n-@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/2\n-AACCGGGGGACGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTT\n-+\n-@FEEH>==9=05544FGFGFHHHBHHHFHF>AAAAHHHHHHHEHHHHHHH\n-@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/2\n-TCAGCATGCTTCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTG\n-+\n-5515555/5515444FFHHHHHHHHHHHHHHHHHHHHHHHEHHHHGHH@H\n-@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/2\n-GGCCAAGCAGGTCACCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGC\n-+\n-HHHHFGHHHGFAFFFHHFHHHHH/HHHHGHHEHHEHGFHHDGF=AA=@@8\n-@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/2\n-TGGAGTCCTTCATGCCCAGGTCTGGAACCCAGGTTCTGACCCCAGGGCCC\n-+\n-FDFFFEGGGGHHHHGHHHHH>AAA8GGGGGHHHGHHHHHHHHHHHGFHHH\n-@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/2\n-GGCCAGCCGGGCTCCAGAGGGGTCAGGGCGCGACGAGAACCAACTCTTTA\n-+\n-FDFFBDFDDBAAADDGHGHHHHBHHHHHGHGHHHHHHHHHHHHHHHHHFH\n-@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/2\n-GGGCTCTCCCTCTGTATCGCCTGGGGAGGCTGCTGAGGTGACTTTTTGGA\n-+\n-A?DDABFBFFHGHEHHHHHHHHHIHHDHCC55555BFFCD;:9=;=@=><\n-@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/2\n-CACAGTAGGCGTTCTATAAATGTGTCACAAGAATGGCTTCCCTCAGGAAG\n-+\n-55444;@=@>HHHDHHHHHFFGHHHHHHHHHIHHHFH=HHBB?<D#####\n-@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/2\n-GCCTGGGCAACATAGCGAAACCACATCTCTACAAAAAAATCCTCCAAAAT\n-+\n-HGIEHHHHGHF=@FF8A>>@HFHH=HHHHHHHIHHHGGGGH@@HHGGGEG\n-@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/2\n-AGCCACTGCCTTTCTGCTCAGATGCTGGCACCTCCGCCCCCGGGGCTGCC\n-+\n-EHHHFF?GFDGFFB???DDAD<FC<55555FFGGG<?>>61/5444-555\n-@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/2\n-CGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAGCTGTCCC\n-+\n-HHHHHHHHHHHHHHHGHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHDHHH\n-@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/2\n-GGAGTCACAGGATTTGGAGGCAGGAGTGCTGGCGGGAAGGGCATTCAGGA\n-+\n-HHHHHHHFEH?=DDDHIFHHEHEDE?DAADH@FHHC'@CE##########\n-@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/2\n-CTCGGAAGGCAAGGCACATCTTGTGGTAGAAAATTTCGTGCAAATTAGGA\n-+\n-HHHHHGGH=IADDADHHGHH444-4A?A?AGHGHHFGFG@/5544HDHEE\n-@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/2\n-CTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTCCCATCTGACCCTCCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/2\n-GTTTAAAGGTGATACTTATTCTCGGAAGGCAAGGCACATCTTGTGGGAGA\n-+\n-EF;GG4445544544FF@FFEHFHFFHGHH####################\n-@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/2\n-GGGACAGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGA\n-+\n-FF8FFBFFFFFDF@FCD>CFF@@F:HEHEHHHHBHHHHHF==<>5?DDA;\n-@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/2\n-GTGAGGGTGGGCGACCCAGGATTCCCCCTCCCCTTCCCAAATAAAGATGA\n-+\n-BEFDB44(4411445DA?ADHHHHIFDDC>:::5@DDDC?HHHDEBFFB>\n-@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/2\n-CAGAGCGTAAGAAATGGATCCATTGTTCCGAGAACGTGATCGCCCTCATC\n-+\n-HH@HHHFDHHHHHHHFHHHGHGHHHHHHHGGHHHHHFHHAHHHHHGHHGH\n-@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/2\n-CCTCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n-+\n-GG/GGHHHHHHHHHHHHHHHHHHHDHHHHHFDHHHHHH@HHEHHHHHHHH\n-@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/2\n-GGCAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAAGG\n-+\n-FFGHH55,5514441>><<BHHEHFF?9F4FFFBFHHHHHHHHGHHFF4H\n-@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/2\n-CTTCGCAAATTTGTCCCAGGGATGGATCGCCTGTGCTGCCTTCGCCCGCC\n-+\n-D@5AA4453451444GGGFDHH@GEA;DDD=:=+:D@DFDEDHHB#####\n-@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/2\n-CCTGTCCAGAGTCTGAGGGGGGAGGCCAGGCCCTGCCTTGGGGTCTGAGG\n-+\n-##################################################\n-@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/2\n-GGGGGGCGCCGCAGCTGCGCGGCCGCTCCCTCCTAGCCGGCCCTTGAGGG\n-+\n-HHHHHHHEGHIHHHHHHDHF@@<A?FFE@FGGGAG4====HHHHHHHEHB\n-@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/2\n-CTGAATCCCTTGCCCAGAGGA"..b':6601:197274#0/2\n-CCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGH\n-@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/2\n-GGCCCTGCCCTTGACCCCACTACCCGTGGGGCTGCAGCCGCCTTCGCTGC\n-+\n-HHHHHHHHHHHHHHHHHHIHHGHHH>A??@FHHHFHHFHDHH=HHB>4FF\n-@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/2\n-CTTTATTTGGGAAGGGGAGGGGGAATCCTGGGTCGCCCACCCTCACCCTG\n-+\n-HHHHHHHGGHHHHHHHHFHGGGGGHHHHHHHGHHHHFHHEH9BHEDD###\n-@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/2\n-CGGCCGGCTGCATCCCACACCAGCCTGAGCCCCAGACGGTCAGTCAGTGC\n-+\n-HHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHH\n-@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/2\n-CGAGGGGTCCAGAGTGGAGAGAGCCCCGAGCAGGAGTGCATCTCCCTCGC\n-+\n-HHHHHHHFHHHHHHHFHHHHHHHHHHHHHHHGHHHHHHHHHHIHFHHHGH\n-@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/2\n-GGCTGCAGATTCCATTCAGCAGGCCCGAGAGCAAGCACCACGCTAGCCTG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHG\n-@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/2\n-CAAGACTGCAACTTCAGATGCTCCGCACGCTGGAGATGCTGGACAGGGGC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFHHHHHHHEHEHE\n-@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/2\n-CTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGAGGAGG\n-+\n-HHGHHHHHHHHHHHHHHHHHHGHHGHHHHHHHHHHHHHHHHHHHEHHHHG\n-@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/2\n-GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n-+\n-HF@GHD?>DA=<>;=444444245444445>>@>;BECBF@?A<>@AAA8\n-@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/2\n-GGACTGAGGACGACTCCTTGGACTGGAAAATGCTGGCCCCGTACGGCGTC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHIH\n-@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/2\n-GGAACCTTCTCCGGATTGGGTTCATGAGCATTTTTGTGGGTGTGTATGTG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHFH\n-@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/2\n-GGCCGTCTTTGACCTGCTCCTGGCTGTTGGCATTGCTGCCTACCCTGGCA\n-+\n-55555<@@@@===<655244A??DAC:C?#####################\n-@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/2\n-GGTGAGGCCAGCACCTTGTCCATTTGGGACCTGGCGGCGCCCACCCCCCG\n-+\n-5-5449=;==BFFBFDBFDDC>?>>D?DDDHHHHHBFFC@44244<<<<<\n-@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/2\n-ATGATGTTTCCACAAAGCAGGCATTCGGGCTCCTCGCACCTACCCCAGCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/2\n-TGCTGCGGGTGTCTCCGGCTGGGCATGCGGGGGCCCGGGGACTGCCTGGC\n-+\n-HHHHHHHHHHHHGHHHHFDEBDDBB5552*DDBBFHHHHH@FDF######\n-@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/2\n-CTGGTCTCCCATCTGACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFH\n-@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/2\n-GGACCTGGGCACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/2\n-GGTGGGGGTCGTGGAGTGGGGGAGGGAGGCCAGCCGGGCTCCAGAGGGGT\n-+\n-HHHHHHGGGHGBGEFHHHFHHG9GGC;HHEHHHCHFG@FFAA;=9DD;C7\n-@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/2\n-CCGTTTTGAACATGTGTAACCGACAGTCTGCCTGGGCCACAGCCCTCTCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHIHHHHHGHDHGHHHBHCFFFFHHHHH\n-@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/2\n-GGAAGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCT\n-+\n-HHHHIHIHHHHHGHHHHHHHFHDHFGHHHHHEHEHHHHHHHHHHHHGHHH\n-@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/2\n-GGAGGCTGAAGTGCTGGACAGCCACGTAGGCCATGCCGAGGTAGGCAGCA\n-+\n-HFHHHHHHHHHHIHGHHHHHHHHHHHHHHHHHEHHHHGGHH?FHHHHHGH\n-@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/2\n-GACATATTTGAGAGACACTGGGGAGACAGAATCGACCTGACCTTGCTGAC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHH@HHHEHHHFHHHHHGHAHFBEHHGFBG\n-@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/2\n-GTGACACTGCATTGCTGCTGCCAGCACCCCTTGTTAGGGTTTGTAATTGC\n-+\n-F8HHHFGGG8DC>A>ADD1?##############################\n-@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/2\n-CTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGA\n-+\n-HHHHHHIHHHHHDHHGHHHGHHHHHHHHHHHHFGHHHHFHHDHHHCHHHH\n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/output1.txt
--- a/SpliceTrap/test-data/output1.txt Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,9901 +0,0 @@\n-na\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102416384,102416631\t102416631,102417121\t102418700,102418759\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102656105,102656137\t102656244,102656269\t102656269,102656385\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t103451082,103451182\t103451182,103451285\t103452155,103452405\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579368,106579474\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579090,106579251\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t110875965,110876070\t110881232,110881274\t110881274,110881535\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t112182793,112182887\t112182887,112182891\t112183692,112183779\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t112443514,112443594\t112443594,112443606\t112445169,112445650\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t112446987,112447467\t112448377,112448381\t112448381,112448489\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113146010,113146097\t113150982,113150997\t113150997,113151123\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t11320856,11320898\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t11314138,11314258\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113588823,113589079\t113589079,113589082\t113589665,113589797\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113679668,113679978\t113691963,113691996\t113691996,113692070\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113697493,113697603\t113721000,113721031\t113721031,113721168\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t114129890,114129971\t114129971,114130669\t114131146,114131336\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t114447879,114448'..b'1387,94571429\t94573959,94574187\t94574553,94574736\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576709\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576805\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94577081,94577096\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94576661,94576709\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94577081,94577096\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94583987,94584125\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94582962,94583059\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94583987,94584125\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94576661,94576805\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94576661,94576709\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94588228,94588390\t94590412,94590552\t94590637,94590776\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0.0000\t\t\t\n'
b
diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/output2.txt
--- a/SpliceTrap/test-data/output2.txt Thu Sep 07 14:12:28 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,9901 +0,0 @@\n-AA-AA-10-100261028-100261044.0\t0.0010\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t63\t16\t85\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100275490-100275493.0\t0.0010\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t2213\t3\t105\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100286213-100286332.0\t0.0010\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t119\t108\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100329324-100329346.0\t0.0010\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t114\t22\t618\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100523929-100524139.0\t0.0010\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t201\t210\t156\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100827542-100827561.0\t0.0010\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t87\t19\t2380\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100983304-100984489.0\t0.0010\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t62\t1185\t1127\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101064205-101064260.0\t0.0010\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t562\t55\t161\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101136667-101136690.0\t0.0010\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t202\t23\t1099\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101601169-101601202.0\t0.0010\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t125\t33\t134\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101608834-101608866.0\t0.0010\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t73\t32\t71\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101608834-101608870.0\t0.0010\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t134\t36\t67\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101774879-101774912.0\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t87\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101774879-101774912.1\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t37\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101828696-101828717.0\t0.0010\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t70\t21\t125\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101829897-101829942.0\t0.0010\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t54\t45\t96\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102138618-102138719.0\t0.0010\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t189\t101\t46\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102148627-102148633.0\t0.0010\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t6\t61\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102157185-102157188.0\t0.0010\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t56\t3\t140\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102359266-102359269.0\t0.0010\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t224\t3\t166\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102402251-102402254.0\t0.0010\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t112\t3\t275\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102416631-10'..b'40\t139\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94590637-94590776.0\t0.0010\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t140\t139\t84\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94592228-94592312.0\t0.0010\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t139\t84\t120\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94592394-94592514.0\t0.0010\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t84\t120\t117\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94593498-94593615.0\t0.0010\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t120\t117\t160\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94594694-94594854.0\t0.0010\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t160\t97\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94596859-94596956.0\t0.0010\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t160\t97\t77\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94597034-94597111.0\t0.0010\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t97\t77\t572\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94687769-94687932.0\t0.0010\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t494\t163\t150\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94688124-94688274.0\t0.0010\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94694916-94695077.0\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94694916-94695077.1\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94720395-94720537.0\t0.0010\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94720395-94720537.1\t0.0010\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t142\t188\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94724345-94724533.0\t0.0010\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94733296-94733438.0\t0.0010\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t188\t142\t929\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94775057-94775220.0\t0.0010\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t193\t163\t150\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94775389-94775539.0\t0.0010\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94780498-94780659.0\t0.0010\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94781820-94781997.0\t0.0010\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t177\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94820495-94820637.0\t0.0010\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94842836-94843024.0\t0.0010\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94849916-94850058.0\t0.0010\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0\t0\t0\t0.0000\t\n\\ No newline at end of file\n'