| Previous changeset 0:3fc5784c1ce0 (2017-09-07) Next changeset 2:40b8b1f9ea7d (2017-09-07) |
|
Commit message:
planemo upload |
|
added:
SpliceTrap.tar.gz |
|
removed:
SpliceTrap/PostAnalysis SpliceTrap/SpliceChange SpliceTrap/SpliceTrap.pl SpliceTrap/TXdbgen SpliceTrap/bin/ApplyCutoff.jie.pl SpliceTrap/bin/Pair_estimate_c SpliceTrap/bin/PostAnalysis SpliceTrap/bin/PostAnalysis.pl SpliceTrap/bin/SpliceChange SpliceTrap/bin/SpliceChange.pl SpliceTrap/bin/SpliceTrap SpliceTrap/bin/SpliceTrap.pl SpliceTrap/bin/SpliceTrap_measure.pl SpliceTrap/bin/TXdbgen SpliceTrap/bin/TXdbgen.pl SpliceTrap/bin/apply_cutoff.sh SpliceTrap/bin/batch_para_cov10p_fit.sh SpliceTrap/bin/batchqsub.pl SpliceTrap/bin/batchqsub.pl_orig SpliceTrap/bin/beta_fit.R SpliceTrap/bin/bowtie2eland.pl SpliceTrap/bin/calc_pval.R SpliceTrap/bin/downloaddb.pl SpliceTrap/bin/get.frag.size.pl SpliceTrap/bin/get.hist.pl SpliceTrap/bin/get_bed_fa_j.pl SpliceTrap/bin/get_event_dist_fit.pl SpliceTrap/bin/gtf2bed.pl SpliceTrap/bin/mapping_bowtie.sh SpliceTrap/bin/mapping_rmap.sh SpliceTrap/bin/mark.mt.4eland.pl SpliceTrap/bin/rmap2eland.pl SpliceTrap/bin/scan_nomt.pl SpliceTrap/bin/scanbed2txdb.pl SpliceTrap/bin/splitdb.sh SpliceTrap/bin/vslz.pl SpliceTrap/cutoffs/cutoff.pair.06.txt SpliceTrap/cutoffs/cutoff.pair.07.txt SpliceTrap/cutoffs/cutoff.pair.08.txt SpliceTrap/refGenes.bed SpliceTrap/splice_trap.xml SpliceTrap/src/Makefile SpliceTrap/src/splicetrap.estimate.cpp SpliceTrap/test-data/input1.fastq SpliceTrap/test-data/input2.fastq SpliceTrap/test-data/output1.txt SpliceTrap/test-data/output2.txt |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap.tar.gz |
| b |
| Binary file SpliceTrap.tar.gz has changed |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/PostAnalysis --- a/SpliceTrap/PostAnalysis Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,66 +0,0 @@ -#!/usr/bin/perl -my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; -# this script is a wrapup for Post analysis based on the ratio file output - -use strict; -use Getopt::Long; -my $RatioFile=""; -my $OutputFile = ""; -my $JunctionCut=5; -my $CutoffLevel="M"; -my $noIRM = 0; -my $noIRMstr=""; - -GetOptions ( - "i:s"=>\$RatioFile, - "o:s"=>\$OutputFile, - "c:s"=>\$CutoffLevel, - "noIRM|noirm"=>\$noIRM, - "j:i"=>\$JunctionCut -); - -my $InputParaDes=" Usage of the script: - -i input file (.ratio file) - -o output file - -c Cutoff Level:H/[M]/L - Means High, Middle or Low - -j Junction reads per junction requirement for each exon-isoform [5] - --noIRM Use the unadjusted inclusion ratios (before IRM correction) -"; - -if($RatioFile eq "") -{ - print $InputParaDes; - exit; -} - -if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") -{ - print $InputParaDes; - exit; -} -if($noIRM) -{ - $noIRMstr= "noirm"; -} - - -system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); - -open(rawfile, "$OutputFile.raw"); -open(outfile, ">$OutputFile"); -while(my $line=<rawfile>) -{ - chomp($line); - my @a=split("\t",$line); - if($noIRM) - { - print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; - } - else - { - print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; - } -} -close(outfile); -close(rawfile); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/SpliceChange --- a/SpliceTrap/SpliceChange Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,176 +0,0 @@ -#!/usr/bin/perl -my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; -# compare two outputs from SpliceTrap -use strict; - -# the information needed -# inclusion ratio input file -# filtered out or not input file -# minimal inclusion ratio at least 0.1 for one condition -# minimal splicing changes parameter -# orignial pipeline written by Martin Akerman -# re-organized and re-written by Jie Wu - -use FileHandle; - -use Getopt::Long; - -my @programs = ('grep','mkdir','R','paste','awk','sort'); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); -} - - -my $InputFileName1 = ""; -my $InputFileName2 = ""; -my $OutputFileName = ""; -my $minchange = 0.3; -my $mininc = 0.1; -my $noIRM = 0; - - -GetOptions ( - "1:s"=>\$InputFileName1, - "2:s"=>\$InputFileName2, - "o:s"=>\$OutputFileName, - "noIRM|noirm"=>\$noIRM, - "m:f"=>\$mininc, - "c:f"=>\$minchange -); - -my $InputParaDes=" Usage of the script: - -1 input file 1, output from SpliceTrap, *.raw file in the output folder - -2 input file 2. see above. - -o output file prefix. - -c minimal change required, [default:0.3] - -m minimal inclusion ratio for at least one condition. [defualt:0.1] - --noIRM Use the unadjusted inclusion ratios (before IRM correction) -"; - -if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") -{ - print $InputParaDes; - exit; -} - - - -if(-d "$OutputFileName.cache" ) -{ - print "Aborted! output cache folder exists: $OutputFileName.cache \n"; - exit; -} -else -{ - system("mkdir $OutputFileName.cache"); -} - -# -my %ir1; # records ir from file1 -my %ir2; # records ir from file2 -# only records trios above the cutoffs - -open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; -while(my $line=<input1>) -{ - chomp($line); - my @a = split("\t", $line); - if($a[21] ne "na") - { - if($noIRM) - { - $ir1{$a[0]} = $a[1]; - } - else - { - $ir1{$a[0]} = $a[2]; - } - } -} -print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; -close(input1); - -open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; -while(my $line=<input2>) -{ - chomp($line); - my @a = split("\t", $line); - if($a[21] ne "na") - { - if($noIRM) - { - $ir2{$a[0]} = $a[1]; - } - else - { - $ir2{$a[0]} = $a[2]; - } - } -} -print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; - - -close(input2); - - -## -my %mean; -my %sd; - -my %num; - -my %filehandles; - -my @types = ("CA", "IR", "AD","AA"); - -foreach my $type (@types) -{ - my $fh = new FileHandle; - open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; - $filehandles{$type} = $fh; -} - - -foreach my $key (keys %ir1) -{ - if(exists $ir2{$key}) - { - if(($ir1{$key} + $ir2{$key}) > 0) - { - #find the type - my $type = substr($key, 0, 2); - $type = "CA" if $type eq "CS"; - $num{$type}++; - - my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); - $mean{$type} = $mean{$type} + $change; - $sd{$type} = $change*$change + $sd{$type}; - - $change = sprintf("%.4f",$change); - - my $fout = $filehandles{$type}; - print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; - } - } -} - -foreach my $type (keys %filehandles) -{ - close($filehandles{$type}); - if($num{$type} == 0) - { - warn "no AS events passed filters for both files\n"; - next; - } - $mean{$type} = $mean{$type}/$num{$type}; - $sd{$type} = sqrt($sd{$type}/$num{$type}); - system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); - system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); - print "$num{$type} $type events processed...\n"; - #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; - -} -system("rm $OutputFileName.cache -rf"); - - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/SpliceTrap.pl --- a/SpliceTrap/SpliceTrap.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,285 +0,0 @@\n-#!/usr/bin/perl\n-# Author: wuj@cshl.edu\n-# Modified: Baekdoo Kim (baegi7942@gmail.com)\n-use strict;\n-use Getopt::Long;\n-use Data::Dumper;\n-####################\n-use Cwd;\n-my $PROG = $0;\n-my $CUR_DIR = Cwd::abs_path(Cwd::cwd());\n-my $PROG_ABS_PATH = Cwd::abs_path($PROG);\n-#my $SrcFolder=`dirname $PROG_ABS_PATH`;\n-#chomp($SrcFolder);\n-#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";\n-#my $SrcFolder=$config{SrcFolder};\n-\n-my @programs = (\'R\',\'echo\',\'cat\',\'bash\',\'perl\',\'ln\',\'mkdir\',\'paste\',\'grep\',\'sort\',\'basename\',\'awk\',\'wc\',\'mv\',\'cd\',\'rm\',\'split\',\'head\' );\n-foreach my $program (@programs)\n-{\n- die ("CHECK: $program not found\\n") if(system("hash $program >/dev/null"));\n-\n-}\n-\n-####################\n-my $SrcFolder="";\n-my $MapSoftware="bowtie";\n-my $DatabasePrefix="hg38";\n-my $ReadFileFormat="";\n-my $ReadFile1Name="";\n-my $ReadFile2Name="";\n-my $CutoffLevel="M";\n-my $Outputfolder=$CUR_DIR;\n-my $OutputPrefix="Result";\n-#my $CutoffOnly=0;\n-my $ReadSize=36;\n-my $JunctionCut=5;\n-my $onGalaxy_raw="";\n-my $onGalaxy_txt="";\n-my $BowtieThreads=1;\n-my $noIRMstr="";\n-my $noIRM = 0;\n-\n-my $num_args = $#ARGV;\n-$onGalaxy_raw = $ARGV[$num_args-1];\n-$onGalaxy_txt = $ARGV[$num_args];\n-\n-GetOptions (\n-\t"l:s"=>\\$SrcFolder,\n- "m:s"=>\\$MapSoftware,\n- "d:s"=>\\$DatabasePrefix,\n-# "f:s"=>\\$ReadFileFormat,\n- "1:s"=>\\$ReadFile1Name,\n- "2:s"=>\\$ReadFile2Name,\n- "c:s"=>\\$CutoffLevel,\n- "outdir:s"=>\\$Outputfolder,\n- "o:s"=>\\$OutputPrefix,\n- "j:i"=>\\$JunctionCut,\n- "s:i"=>\\$ReadSize,\n- "p:i"=>\\$BowtieThreads,\n- "noIRM|noirm"=>\\$noIRM\n-# "local:s"=>\\$local,\n-# "rerun"=>\\$CutoffOnly\n-);\n-#-O for galaxy output\n-\n-\n-my $InputParaDes=" Usage of the script:\n-\t-l\tBase Location (required)\n- -m Mapping software: [bowtie]/rmap\n- -d Database prefix: [hg18]/mm9/rn4/userdefined\n- -1 Read File 1\n- -2 Read File 2\n- -c Cutoff Level:H/[M]/L\n- Means High, Middle or Low\n- -j Junction reads requirement per junction for each exon-isoform [5]\n- -o Output prefix {Result}\n- -s Read Size [36]\n- --outdir Output folder [./]\n- -p Bowtie parameter, threads number, only use this when you don\'t use qsub [1]\n- --noIRM Skip the IRM correction step\n- \n- This is a quick help, please refer to the README file for details.\n-";\n-\n-\n-if($SrcFolder eq "") {\n-\tprint "[CHECK] - Please provide the location of the script (option \'-l\')\\n\\n";\n-\texit;\n-}\n-\n-if($ReadFile2Name eq "")\n-{\n- $ReadFile2Name = $ReadFile1Name;\n- #trigger singled end mode\n-}\n-\n-if($ReadFile1Name eq "" or $ReadFile2Name eq "" )\n-{\n- print $InputParaDes;\n- exit;\n-}\n-\n-if($BowtieThreads < 1)\n-{\n- print $InputParaDes;\n- exit;\n-}\n-\n-if (! -e "$SrcFolder/db/$DatabasePrefix/parallel")\n-{\n- print "CHECK: Error, the database you specified is not properly installed.\\n";\n- #print $InputParaDes;\n- exit;\n-\n-}\n-\n-if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L")\n-{\n- print $InputParaDes;\n- exit;\n-}\n-\n-\n-$ReadFile1Name = Cwd::abs_path($ReadFile1Name);\n-$ReadFile2Name = Cwd::abs_path($ReadFile2Name);\n-\n-#check the files\n-open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\\n");\n-my $checkoneline = <check>;\n-if(substr($checkoneline,0,1) eq ">")\n-{\n- $ReadFileFormat = "fasta";\n-}\n-elsif(substr($checkoneline,0,1) eq "@")\n-{\n- $ReadFileFormat = "fastq";\n-}\n-else\n-{\n- die("CHECK: ERROR:Please check $ReadFile1Name\\n");\n-}\n-close(check);\n-\n-open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\\n");\n-my $checkoneline = <check>;\n-if(substr($checkoneline,0,1) eq ">")\n-{\n- die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\\n") if ($ReadFileFormat ne "fasta");\n-}\n-elsif(substr($checkoneline,0,1) eq "'..b'print "CHECK: checking rmap...\\n";\n- if(system("type rmap &>/dev/null") ==0 )\n- {\n- print "CHECK: rmap found, continue\\n";\n- }\n- else\n- {\n- die "CHECK: No rmap found in PATH, EXIT!\\n";\n- }\n-}\n-else\n-{\n- die "CHECK: option -m only takes rmap or bowtie as inputs\\n";\n-}\n-\n-if($ReadSize == 0)\n-{\n- die "CHECK: Please check option -s Read size\\n";\n-}\n-\n-if($noIRM)\n-{\n- $noIRMstr= "noirm";\n-}\n-\n-#write more checks later\n-print "PARAMETERS:\\tMapping software: ",$MapSoftware,"\\n";\n-print "PARAMETERS:\\tDatabase prefix: ",$DatabasePrefix,"\\n";\n-print "PARAMETERS:\\tRead end 1: ",$ReadFile1Name,"\\n";\n-print "PARAMETERS:\\tRead end 2: ",$ReadFile2Name,"\\n" if($ReadFile2Name ne $ReadFile1Name);\n-print "PARAMETERS:\\tGalaxy_raw: ",$onGalaxy_raw,"\\n"; #if($onGalaxy_raw ne "");\n-print "PARAMETERS:\\tGalaxy_txt: ",$onGalaxy_txt,"\\n"; #if($onGalaxy_txt ne "");\n-print "PARAMETERS:\\tCutoff level: ",$CutoffLevel,"\\n";\n-print "PARAMETERS:\\tJunction reads.min:",$JunctionCut,"\\n";\n-print "PARAMETERS:\\tOutput folder: ",$Outputfolder,"\\n";\n-print "PARAMETERS:\\tOutput prefix: ",$OutputPrefix,"\\n";\n-print "PARAMETERS:\\tRead size: ",$ReadSize,"\\n";\n-print "PARAMETERS:\\tBowtie threads #: ",$BowtieThreads,"\\n";\n-print "PARAMETERS:\\tNo IRM.\\n" if ($noIRM);\n-\n-if($MapSoftware eq "bowtie")\n-{\n- print "=================STAGE 1 MAPPING===================\\n";\n- system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");\n- system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);\n- print "=================STAGE 2 ESTIMATION================\\n";\n- # ratio, log, nums\n- system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n- print "=================STAGE 3 CUTOFF====================\\n";\n- # raw\n- system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n-\n-\n-}\n-\n-if($MapSoftware eq "rmap")\n-{\n- print "=================STAGE 1 MAPPING===================\\n";\n-\n- system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;\n- system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);\n- print "=================STAGE 2 ESTIMATION================\\n";\n-\n- system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n- print "=================STAGE 3 CUTOFF====================\\n";\n- system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n-\n-\n-}\n-\n-#print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\\n";\n-\n-if($onGalaxy_raw ne "" && $onGalaxy_txt ne "")\n-{\n- print "OUTPUTFILE:$OutputPrefix.raw\\n";\n- system("grep -v na $Outputfolder/$OutputPrefix.raw >$onGalaxy_raw");\n- print "OUTPUTFILE:$OutputPrefix.txt\\n";\n- system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy_txt");\n-}\n-\n-print "============Clean up\\n";\n-system("rm -r $Outputfolder/$OutputPrefix.*");\n-\n-sub random_sessid {\n- #my @chars = (0..9,a..z,A..Z);\n- my @chars = (\'a\'..\'z\',\'A\'..\'Z\');\n- my $len = 10;\n- my $string = join \'\', map {$chars[rand(@chars)]} (1..$len);\n- return $string;\n-} \n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/TXdbgen --- a/SpliceTrap/TXdbgen Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,97 +0,0 @@ -#!/usr/bin/perl -my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; -# this script is to generate TXdb database files from bed/gtf file - -use strict; -use Cwd; -use Getopt::Long; - -my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); - -} - - -my $genomedir = ""; - -my $annofilename = ""; -my $txdbname = "userdefined"; -my $knownonly = 0; -my $gtfinput = 0; - -GetOptions ( - "g:s"=>\$genomedir, - "a:s"=>\$annofilename, - "n:s"=>\$txdbname, - "gtf"=>\$gtfinput, - "knownonly"=>\$knownonly -); - -my $InputParaDes=" Usage of the script: - -g genome fasta file location - -a annotation file (bed/gtf) - -n txdb name - --gtf specify this if annotation file is in gtf format -"; - -if($genomedir eq "" or $annofilename eq "") -{ - print $InputParaDes; - exit; -} - -$genomedir = Cwd::abs_path($genomedir); -$annofilename = Cwd::abs_path($annofilename); - -my $annofilebase = `basename $annofilename`; -chomp($annofilebase); -#need a cache folder to avoid mess - -my $cachefolder = $annofilebase.".cache"; - -if (! -e $cachefolder) -{ - mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; -} -if($gtfinput) -{ - print "TXDBGEN: converting gtf file into bed format\n"; - system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); - $annofilename = "$cachefolder/$annofilebase.bed"; -} - - -print "TXDBGEN: scan $annofilename for AS events...\n"; -system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); -print "TXDBGEN: fetch sequences from $genomedir...\n"; -system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); -#get fasta file list -system("ls $genomedir/*.fa >$cachefolder/chr.list"); - -system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); - -print "TXDBGEN: generate files for parallel computing...\n"; -if (! -e "$cachefolder/parallel") -{ - mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; -} -system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); -system("rm $cachefolder/out.bed"); -system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); -system("rm $cachefolder/TXdb.tmp.evi"); -system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); -print "TXDBGEN: build Bowtie index...\n"; - -if (! -e "$cachefolder/btw") -{ - mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; -} -system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); -system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); -print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; - -system("mv $cachefolder $SrcFolder/../db/$txdbname"); -print "TXDBGEN: Done!\n"; - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/ApplyCutoff.jie.pl --- a/SpliceTrap/bin/ApplyCutoff.jie.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,108 +0,0 @@ -#apply our cuttoff hash table on the IR calculated by Jie -#Modified from Martin's code -use strict; - -use Cwd; -my $PROG = $0; -my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); -my $PROG_ABS_PATH = Cwd::abs_path($PROG); -my $SrcFolder=`dirname $PROG_ABS_PATH`; -chomp($SrcFolder); - -my %cutoff; -my @Exlen; - -my $cutoff_level=$ARGV[1]; -my $JunctionCut = $ARGV[2]; -my $noirm = $ARGV[3]; - -my $cutoff_level_index=7; - - $cutoff_level_index=8 if $cutoff_level eq "H"; -$cutoff_level_index=6 if $cutoff_level eq "L"; - -open(CUT,"$SrcFolder/../cutoffs/cutoff.pair.0".$cutoff_level_index.".txt") || die "cutoff file not found $!\n"; - -while(<CUT>){ - chomp; - my @a=split(/\t/,$_); - push @Exlen,$a[0]; - $cutoff{$a[0]}=$a[1]; -} -close(CUT); - -open(IN,$ARGV[0]); - -while(<IN>){ - chomp; - my @a=split(/\t/,$_); - my $Ez='Ez=yes'; - my $print=$_; - if($a[0]=~m/#/g){next} - my $eventid=substr($a[0],0,2); - my $bir =$a[2]; - $bir =$a[1] if($noirm eq "noirm"); - my $j12 = $a[8]; - my $j23 = $a[9]; - my $j13 = $a[10]; - my $cov1=$a[11]; - my $cov2=$a[12]; - my $cov3=$a[13]; - my $siz1=$a[15]; - my $siz2=$a[16]; - my $siz3=$a[17]; - - - my $stat1='exon1='.cutoff($siz1,$cov1,\@Exlen,%cutoff); - my $stat2='exon2='.cutoff($siz2,$cov2,\@Exlen,%cutoff); - my $stat3='exon3='.cutoff($siz3,$cov3,\@Exlen,%cutoff); - if($stat1 eq "exon1=yes" and $stat3 eq "exon3=yes") - { - #$Ez="passed"; - $Ez=$eventid if $eventid eq "AA"; - $Ez=$eventid if $eventid eq "AD"; - $Ez=$eventid if $eventid eq "IR"; - if ($eventid eq "CS" or $eventid eq "CA" or $eventid eq "ME") - { - if($bir >0.9) - { - $Ez = "CS"; - } - else - { - $Ez = "CA"; - } - } - - } - else - { - #$Ez="declined"; - $Ez = "na"; - } - if( ($j12<$JunctionCut or $j23<$JunctionCut) and $j13 <$JunctionCut) - { - $Ez = "na"; - } - print $print,"\t",$stat1,"\t",$stat2,"\t",$stat3,"\t",$Ez,"\n"; -} -close(IN); -#################################################################### - -sub cutoff{ - my($s,$c,$E,%cutoff)=@_; - my @Exlen=@$E; - if($c eq 'NA'){return('NA')} - my $range=$Exlen[$#Exlen]; - foreach my $l(@Exlen){if($s<$l){$range=$l;last}} - if($c<$cutoff{$range}){return('no')} - return('yes') -} - - -sub contain{ - my ($a,@a)=@_; - foreach(@a){if($a eq $_){return(1)}} - return(0) -} - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/Pair_estimate_c |
| b |
| Binary file SpliceTrap/bin/Pair_estimate_c has changed |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/PostAnalysis --- a/SpliceTrap/bin/PostAnalysis Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,66 +0,0 @@ -#!/usr/bin/perl -my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; -# this script is a wrapup for Post analysis based on the ratio file output - -use strict; -use Getopt::Long; -my $RatioFile=""; -my $OutputFile = ""; -my $JunctionCut=5; -my $CutoffLevel="M"; -my $noIRM = 0; -my $noIRMstr=""; - -GetOptions ( - "i:s"=>\$RatioFile, - "o:s"=>\$OutputFile, - "c:s"=>\$CutoffLevel, - "noIRM|noirm"=>\$noIRM, - "j:i"=>\$JunctionCut -); - -my $InputParaDes=" Usage of the script: - -i input file (.ratio file) - -o output file - -c Cutoff Level:H/[M]/L - Means High, Middle or Low - -j Junction reads per junction requirement for each exon-isoform [5] - --noIRM Use the unadjusted inclusion ratios (before IRM correction) -"; - -if($RatioFile eq "") -{ - print $InputParaDes; - exit; -} - -if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") -{ - print $InputParaDes; - exit; -} -if($noIRM) -{ - $noIRMstr= "noirm"; -} - - -system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); - -open(rawfile, "$OutputFile.raw"); -open(outfile, ">$OutputFile"); -while(my $line=<rawfile>) -{ - chomp($line); - my @a=split("\t",$line); - if($noIRM) - { - print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; - } - else - { - print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; - } -} -close(outfile); -close(rawfile); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/PostAnalysis.pl --- a/SpliceTrap/bin/PostAnalysis.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,64 +0,0 @@ -# this script is a wrapup for Post analysis based on the ratio file output - -use strict; -use Getopt::Long; -my $RatioFile=""; -my $OutputFile = ""; -my $JunctionCut=5; -my $CutoffLevel="M"; -my $noIRM = 0; -my $noIRMstr=""; - -GetOptions ( - "i:s"=>\$RatioFile, - "o:s"=>\$OutputFile, - "c:s"=>\$CutoffLevel, - "noIRM|noirm"=>\$noIRM, - "j:i"=>\$JunctionCut -); - -my $InputParaDes=" Usage of the script: - -i input file (.ratio file) - -o output file - -c Cutoff Level:H/[M]/L - Means High, Middle or Low - -j Junction reads per junction requirement for each exon-isoform [5] - --noIRM Use the unadjusted inclusion ratios (before IRM correction) -"; - -if($RatioFile eq "") -{ - print $InputParaDes; - exit; -} - -if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") -{ - print $InputParaDes; - exit; -} -if($noIRM) -{ - $noIRMstr= "noirm"; -} - - -system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); - -open(rawfile, "$OutputFile.raw"); -open(outfile, ">$OutputFile"); -while(my $line=<rawfile>) -{ - chomp($line); - my @a=split("\t",$line); - if($noIRM) - { - print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; - } - else - { - print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; - } -} -close(outfile); -close(rawfile); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceChange --- a/SpliceTrap/bin/SpliceChange Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,176 +0,0 @@ -#!/usr/bin/perl -my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; -# compare two outputs from SpliceTrap -use strict; - -# the information needed -# inclusion ratio input file -# filtered out or not input file -# minimal inclusion ratio at least 0.1 for one condition -# minimal splicing changes parameter -# orignial pipeline written by Martin Akerman -# re-organized and re-written by Jie Wu - -use FileHandle; - -use Getopt::Long; - -my @programs = ('grep','mkdir','R','paste','awk','sort'); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); -} - - -my $InputFileName1 = ""; -my $InputFileName2 = ""; -my $OutputFileName = ""; -my $minchange = 0.3; -my $mininc = 0.1; -my $noIRM = 0; - - -GetOptions ( - "1:s"=>\$InputFileName1, - "2:s"=>\$InputFileName2, - "o:s"=>\$OutputFileName, - "noIRM|noirm"=>\$noIRM, - "m:f"=>\$mininc, - "c:f"=>\$minchange -); - -my $InputParaDes=" Usage of the script: - -1 input file 1, output from SpliceTrap, *.raw file in the output folder - -2 input file 2. see above. - -o output file prefix. - -c minimal change required, [default:0.3] - -m minimal inclusion ratio for at least one condition. [defualt:0.1] - --noIRM Use the unadjusted inclusion ratios (before IRM correction) -"; - -if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") -{ - print $InputParaDes; - exit; -} - - - -if(-d "$OutputFileName.cache" ) -{ - print "Aborted! output cache folder exists: $OutputFileName.cache \n"; - exit; -} -else -{ - system("mkdir $OutputFileName.cache"); -} - -# -my %ir1; # records ir from file1 -my %ir2; # records ir from file2 -# only records trios above the cutoffs - -open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; -while(my $line=<input1>) -{ - chomp($line); - my @a = split("\t", $line); - if($a[21] ne "na") - { - if($noIRM) - { - $ir1{$a[0]} = $a[1]; - } - else - { - $ir1{$a[0]} = $a[2]; - } - } -} -print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; -close(input1); - -open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; -while(my $line=<input2>) -{ - chomp($line); - my @a = split("\t", $line); - if($a[21] ne "na") - { - if($noIRM) - { - $ir2{$a[0]} = $a[1]; - } - else - { - $ir2{$a[0]} = $a[2]; - } - } -} -print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; - - -close(input2); - - -## -my %mean; -my %sd; - -my %num; - -my %filehandles; - -my @types = ("CA", "IR", "AD","AA"); - -foreach my $type (@types) -{ - my $fh = new FileHandle; - open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; - $filehandles{$type} = $fh; -} - - -foreach my $key (keys %ir1) -{ - if(exists $ir2{$key}) - { - if(($ir1{$key} + $ir2{$key}) > 0) - { - #find the type - my $type = substr($key, 0, 2); - $type = "CA" if $type eq "CS"; - $num{$type}++; - - my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); - $mean{$type} = $mean{$type} + $change; - $sd{$type} = $change*$change + $sd{$type}; - - $change = sprintf("%.4f",$change); - - my $fout = $filehandles{$type}; - print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; - } - } -} - -foreach my $type (keys %filehandles) -{ - close($filehandles{$type}); - if($num{$type} == 0) - { - warn "no AS events passed filters for both files\n"; - next; - } - $mean{$type} = $mean{$type}/$num{$type}; - $sd{$type} = sqrt($sd{$type}/$num{$type}); - system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); - system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); - print "$num{$type} $type events processed...\n"; - #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; - -} -system("rm $OutputFileName.cache -rf"); - - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceChange.pl --- a/SpliceTrap/bin/SpliceChange.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,174 +0,0 @@ -# compare two outputs from SpliceTrap -use strict; - -# the information needed -# inclusion ratio input file -# filtered out or not input file -# minimal inclusion ratio at least 0.1 for one condition -# minimal splicing changes parameter -# orignial pipeline written by Martin Akerman -# re-organized and re-written by Jie Wu - -use FileHandle; - -use Getopt::Long; - -my @programs = ('grep','mkdir','R','paste','awk','sort'); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); -} - - -my $InputFileName1 = ""; -my $InputFileName2 = ""; -my $OutputFileName = ""; -my $minchange = 0.3; -my $mininc = 0.1; -my $noIRM = 0; - - -GetOptions ( - "1:s"=>\$InputFileName1, - "2:s"=>\$InputFileName2, - "o:s"=>\$OutputFileName, - "noIRM|noirm"=>\$noIRM, - "m:f"=>\$mininc, - "c:f"=>\$minchange -); - -my $InputParaDes=" Usage of the script: - -1 input file 1, output from SpliceTrap, *.raw file in the output folder - -2 input file 2. see above. - -o output file prefix. - -c minimal change required, [default:0.3] - -m minimal inclusion ratio for at least one condition. [defualt:0.1] - --noIRM Use the unadjusted inclusion ratios (before IRM correction) -"; - -if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") -{ - print $InputParaDes; - exit; -} - - - -if(-d "$OutputFileName.cache" ) -{ - print "Aborted! output cache folder exists: $OutputFileName.cache \n"; - exit; -} -else -{ - system("mkdir $OutputFileName.cache"); -} - -# -my %ir1; # records ir from file1 -my %ir2; # records ir from file2 -# only records trios above the cutoffs - -open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; -while(my $line=<input1>) -{ - chomp($line); - my @a = split("\t", $line); - if($a[21] ne "na") - { - if($noIRM) - { - $ir1{$a[0]} = $a[1]; - } - else - { - $ir1{$a[0]} = $a[2]; - } - } -} -print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; -close(input1); - -open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; -while(my $line=<input2>) -{ - chomp($line); - my @a = split("\t", $line); - if($a[21] ne "na") - { - if($noIRM) - { - $ir2{$a[0]} = $a[1]; - } - else - { - $ir2{$a[0]} = $a[2]; - } - } -} -print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; - - -close(input2); - - -## -my %mean; -my %sd; - -my %num; - -my %filehandles; - -my @types = ("CA", "IR", "AD","AA"); - -foreach my $type (@types) -{ - my $fh = new FileHandle; - open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; - $filehandles{$type} = $fh; -} - - -foreach my $key (keys %ir1) -{ - if(exists $ir2{$key}) - { - if(($ir1{$key} + $ir2{$key}) > 0) - { - #find the type - my $type = substr($key, 0, 2); - $type = "CA" if $type eq "CS"; - $num{$type}++; - - my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); - $mean{$type} = $mean{$type} + $change; - $sd{$type} = $change*$change + $sd{$type}; - - $change = sprintf("%.4f",$change); - - my $fout = $filehandles{$type}; - print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; - } - } -} - -foreach my $type (keys %filehandles) -{ - close($filehandles{$type}); - if($num{$type} == 0) - { - warn "no AS events passed filters for both files\n"; - next; - } - $mean{$type} = $mean{$type}/$num{$type}; - $sd{$type} = sqrt($sd{$type}/$num{$type}); - system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); - system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); - print "$num{$type} $type events processed...\n"; - #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; - -} -system("rm $OutputFileName.cache -rf"); - - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceTrap --- a/SpliceTrap/bin/SpliceTrap Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,263 +0,0 @@ -#!/usr/bin/perl -my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; -# Author: wuj@cshl.edu -use strict; -use Getopt::Long; -#################### -use Cwd; -my $PROG = $0; -my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); -my $PROG_ABS_PATH = Cwd::abs_path($PROG); -#my $SrcFolder=`dirname $PROG_ABS_PATH`; -#chomp($SrcFolder); -#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; -#my $SrcFolder=$config{SrcFolder}; - -my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); - -} - -#################### -my $MapSoftware="bowtie"; -my $DatabasePrefix="hg18"; -my $ReadFileFormat=""; -my $ReadFile1Name=""; -my $ReadFile2Name=""; -my $CutoffLevel="M"; -my $Outputfolder=$CUR_DIR; -my $OutputPrefix="Result"; -#my $CutoffOnly=0; -my $ReadSize=36; -my $JunctionCut=5; -my $onGalaxy=""; -my $BowtieThreads=1; -my $noIRMstr=""; -my $noIRM = 0; - -GetOptions ( - "m:s"=>\$MapSoftware, - "d:s"=>\$DatabasePrefix, -# "f:s"=>\$ReadFileFormat, - "1:s"=>\$ReadFile1Name, - "2:s"=>\$ReadFile2Name, - "c:s"=>\$CutoffLevel, - "outdir:s"=>\$Outputfolder, - "o:s"=>\$OutputPrefix, - "j:i"=>\$JunctionCut, - "s:i"=>\$ReadSize, - "p:i"=>\$BowtieThreads, - "noIRM|noirm"=>\$noIRM, - "g:s"=>\$onGalaxy -# "local:s"=>\$local, -# "rerun"=>\$CutoffOnly -); -#-O for galaxy output - - -my $InputParaDes=" Usage of the script: - -m Mapping software: [bowtie]/rmap - -d Database prefix: [hg18]/mm9/rn4/userdefined - -1 Read File 1 - -2 Read File 2 - -c Cutoff Level:H/[M]/L - Means High, Middle or Low - -j Junction reads requirement per junction for each exon-isoform [5] - -o Output prefix {Result} - -s Read Size [36] - --outdir Output folder [./] - -p Bowtie parameter, threads number, only use this when you don't use qsub [1] - --noIRM Skip the IRM correction step - - This is a quick help, please refer to the README file for details. -"; - -if($ReadFile2Name eq "") -{ - $ReadFile2Name = $ReadFile1Name; - #trigger singled end mode -} - -if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) -{ - print $InputParaDes; - exit; -} - -if($BowtieThreads < 1) -{ - print $InputParaDes; - exit; -} - -if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel") -{ - print "CHECK: Error, the database you specified is not properly installed.\n"; - #print $InputParaDes; - exit; - -} - -if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") -{ - print $InputParaDes; - exit; -} - -$ReadFile1Name = Cwd::abs_path($ReadFile1Name); -$ReadFile2Name = Cwd::abs_path($ReadFile2Name); - -#check the files -open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); -my $checkoneline = <check>; -if(substr($checkoneline,0,1) eq ">") -{ - $ReadFileFormat = "fasta"; -} -elsif(substr($checkoneline,0,1) eq "@") -{ - $ReadFileFormat = "fastq"; -} -else -{ - die("CHECK: ERROR:Please check $ReadFile1Name\n"); -} -close(check); - -open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); -my $checkoneline = <check>; -if(substr($checkoneline,0,1) eq ">") -{ - die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); -} -elsif(substr($checkoneline,0,1) eq "@") -{ - die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); -} -else -{ - die("CHECK: ERROR:Please check $ReadFile2Name\n"); -} -close(check); - -$Outputfolder= Cwd::abs_path($Outputfolder); -if($Outputfolder eq "/tmp") -{ - while(-e $Outputfolder) - { - my $random_foldername = random_sessid(); - $Outputfolder = "/tmp/".$random_foldername; - } -} - - -if(! -e $Outputfolder) -{ - mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; -} -if(! -d $Outputfolder) -{ - die "CHECK: $Outputfolder is not a folder\n"; -} - -if($MapSoftware eq "bowtie") -{ - print "CHECK: whether bowtie installed and in PATH\n"; - my $bowtiechecker=`bowtie --version`; - if($bowtiechecker ne "") - { - print "CHECK: bowtie found, information below:\n"; - print $bowtiechecker,"\n"; - } - else - { - die "CHECK: No bowtie found in PATH, EXIT!\n"; - } -} -elsif($MapSoftware eq "rmap") -{ - print "CHECK: checking rmap...\n"; - if(system("type rmap &>/dev/null") ==0 ) - { - print "CHECK: rmap found, continue\n"; - } - else - { - die "CHECK: No rmap found in PATH, EXIT!\n"; - } -} -else -{ - die "CHECK: option -m only takes rmap or bowtie as inputs\n"; -} - -if($ReadSize == 0) -{ - die "CHECK: Please check option -s Read size\n"; -} - -if($noIRM) -{ - $noIRMstr= "noirm"; -} - -#write more checks later -print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; -print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; -print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; -print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); -print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; -print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; -print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; -print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; -print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; -print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; -print "PARAMETERS:\tNo IRM.\n" if ($noIRM); - -if($MapSoftware eq "bowtie") -{ - print "=================STAGE 1 MAPPING===================\n"; - system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); - system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); - print "=================STAGE 2 ESTIMATION================\n"; - - system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; - print "=================STAGE 3 CUTOFF====================\n"; - system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); - - -} - -if($MapSoftware eq "rmap") -{ - print "=================STAGE 1 MAPPING===================\n"; - - system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; - system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); - print "=================STAGE 2 ESTIMATION================\n"; - - system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; - print "=================STAGE 3 CUTOFF====================\n"; - system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); - - -} - -print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; - -if($onGalaxy ne "") -{ - system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy"); -} - -sub random_sessid -{ - #my @chars = (0..9,a..z,A..Z); - my @chars = ('a'..'z','A'..'Z'); - my $len = 10; - my $string = join '', map {$chars[rand(@chars)]} (1..$len); - return $string; - } - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceTrap.pl --- a/SpliceTrap/bin/SpliceTrap.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,261 +0,0 @@ -# Author: wuj@cshl.edu -use strict; -use Getopt::Long; -#################### -use Cwd; -my $PROG = $0; -my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); -my $PROG_ABS_PATH = Cwd::abs_path($PROG); -#my $SrcFolder=`dirname $PROG_ABS_PATH`; -#chomp($SrcFolder); -#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; -#my $SrcFolder=$config{SrcFolder}; - -my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); - -} - -#################### -my $MapSoftware="bowtie"; -my $DatabasePrefix="hg18"; -my $ReadFileFormat=""; -my $ReadFile1Name=""; -my $ReadFile2Name=""; -my $CutoffLevel="M"; -my $Outputfolder=$CUR_DIR; -my $OutputPrefix="Result"; -#my $CutoffOnly=0; -my $ReadSize=36; -my $JunctionCut=5; -my $onGalaxy=""; -my $BowtieThreads=1; -my $noIRMstr=""; -my $noIRM = 0; - -GetOptions ( - "m:s"=>\$MapSoftware, - "d:s"=>\$DatabasePrefix, -# "f:s"=>\$ReadFileFormat, - "1:s"=>\$ReadFile1Name, - "2:s"=>\$ReadFile2Name, - "c:s"=>\$CutoffLevel, - "outdir:s"=>\$Outputfolder, - "o:s"=>\$OutputPrefix, - "j:i"=>\$JunctionCut, - "s:i"=>\$ReadSize, - "p:i"=>\$BowtieThreads, - "noIRM|noirm"=>\$noIRM, - "g:s"=>\$onGalaxy -# "local:s"=>\$local, -# "rerun"=>\$CutoffOnly -); -#-O for galaxy output - - -my $InputParaDes=" Usage of the script: - -m Mapping software: [bowtie]/rmap - -d Database prefix: [hg18]/mm9/rn4/userdefined - -1 Read File 1 - -2 Read File 2 - -c Cutoff Level:H/[M]/L - Means High, Middle or Low - -j Junction reads requirement per junction for each exon-isoform [5] - -o Output prefix {Result} - -s Read Size [36] - --outdir Output folder [./] - -p Bowtie parameter, threads number, only use this when you don't use qsub [1] - --noIRM Skip the IRM correction step - - This is a quick help, please refer to the README file for details. -"; - -if($ReadFile2Name eq "") -{ - $ReadFile2Name = $ReadFile1Name; - #trigger singled end mode -} - -if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) -{ - print $InputParaDes; - exit; -} - -if($BowtieThreads < 1) -{ - print $InputParaDes; - exit; -} - -if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel") -{ - print "CHECK: Error, the database you specified is not properly installed.\n"; - #print $InputParaDes; - exit; - -} - -if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") -{ - print $InputParaDes; - exit; -} - -$ReadFile1Name = Cwd::abs_path($ReadFile1Name); -$ReadFile2Name = Cwd::abs_path($ReadFile2Name); - -#check the files -open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); -my $checkoneline = <check>; -if(substr($checkoneline,0,1) eq ">") -{ - $ReadFileFormat = "fasta"; -} -elsif(substr($checkoneline,0,1) eq "@") -{ - $ReadFileFormat = "fastq"; -} -else -{ - die("CHECK: ERROR:Please check $ReadFile1Name\n"); -} -close(check); - -open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); -my $checkoneline = <check>; -if(substr($checkoneline,0,1) eq ">") -{ - die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); -} -elsif(substr($checkoneline,0,1) eq "@") -{ - die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); -} -else -{ - die("CHECK: ERROR:Please check $ReadFile2Name\n"); -} -close(check); - -$Outputfolder= Cwd::abs_path($Outputfolder); -if($Outputfolder eq "/tmp") -{ - while(-e $Outputfolder) - { - my $random_foldername = random_sessid(); - $Outputfolder = "/tmp/".$random_foldername; - } -} - - -if(! -e $Outputfolder) -{ - mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; -} -if(! -d $Outputfolder) -{ - die "CHECK: $Outputfolder is not a folder\n"; -} - -if($MapSoftware eq "bowtie") -{ - print "CHECK: whether bowtie installed and in PATH\n"; - my $bowtiechecker=`bowtie --version`; - if($bowtiechecker ne "") - { - print "CHECK: bowtie found, information below:\n"; - print $bowtiechecker,"\n"; - } - else - { - die "CHECK: No bowtie found in PATH, EXIT!\n"; - } -} -elsif($MapSoftware eq "rmap") -{ - print "CHECK: checking rmap...\n"; - if(system("type rmap &>/dev/null") ==0 ) - { - print "CHECK: rmap found, continue\n"; - } - else - { - die "CHECK: No rmap found in PATH, EXIT!\n"; - } -} -else -{ - die "CHECK: option -m only takes rmap or bowtie as inputs\n"; -} - -if($ReadSize == 0) -{ - die "CHECK: Please check option -s Read size\n"; -} - -if($noIRM) -{ - $noIRMstr= "noirm"; -} - -#write more checks later -print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; -print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; -print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; -print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); -print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; -print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; -print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; -print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; -print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; -print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; -print "PARAMETERS:\tNo IRM.\n" if ($noIRM); - -if($MapSoftware eq "bowtie") -{ - print "=================STAGE 1 MAPPING===================\n"; - system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); - system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); - print "=================STAGE 2 ESTIMATION================\n"; - - system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; - print "=================STAGE 3 CUTOFF====================\n"; - system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); - - -} - -if($MapSoftware eq "rmap") -{ - print "=================STAGE 1 MAPPING===================\n"; - - system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; - system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); - print "=================STAGE 2 ESTIMATION================\n"; - - system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; - print "=================STAGE 3 CUTOFF====================\n"; - system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); - - -} - -print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; - -if($onGalaxy ne "") -{ - system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy"); -} - -sub random_sessid -{ - #my @chars = (0..9,a..z,A..Z); - my @chars = ('a'..'z','A'..'Z'); - my $len = 10; - my $string = join '', map {$chars[rand(@chars)]} (1..$len); - return $string; - } - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/SpliceTrap_measure.pl --- a/SpliceTrap/bin/SpliceTrap_measure.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,111 +0,0 @@ -# Author: wuj@cshl.edu -use strict; -use Getopt::Long; -#################### -my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; -my $SrcFolder=$config{SrcFolder}; -#my $SrcFolder="/data/zhang/wuj/tools/SpliceTrap.0.8"; -#################### -my $MapSoftware="eland"; -my $ReadFileFormat=""; -my $ReadFile1Name=""; -my $ReadFile2Name=""; -my $CutoffLevel="H"; -my $OutputPrefix="Result"; -my $CutoffOnly=0; -my $ReadSize=36; - -GetOptions ( - "m:s"=>\$MapSoftware, - "f:s"=>\$ReadFileFormat, - "1:s"=>\$ReadFile1Name, - "2:s"=>\$ReadFile2Name, - "c:s"=>\$CutoffLevel, - "o:s"=>\$OutputPrefix, - "s:i"=>\$ReadSize, -# "local:s"=>\$local, - "rerun"=>\$CutoffOnly -); - - -my $InputParaDes=" Usage of the script (v0.82): - -m Mapping software: eland/bowtie/rmap - -f Read File Format: fasta/fastq - -1 Read File 1 - -2 Read File 2 - -c Cutoff Level:H/M/L - Means High, Middle or Low - -o Output prefix - -s Read Size 36 - --rerun Only run the last step, which is filtering -"; - -if($ReadFile1Name eq "" or $ReadFile2Name eq "" or $ReadFileFormat eq "") -{ - print $InputParaDes; - exit; -} - -if($ReadFileFormat ne "fastq" and $ReadFileFormat ne "fasta") -{ - print $InputParaDes; - exit; - -} - -if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") -{ - print $InputParaDes; - exit; -} - -my $dirname1=`dirname $ReadFile1Name`; -my $dirname2=`dirname $ReadFile2Name`; -if($dirname1 ne ".") -{ - system("ln -s $ReadFile1Name ./"); -} -if($dirname2 ne ".") -{ - system("ln -s $ReadFile2Name ./"); -} - -$ReadFile1Name = `basename $ReadFile1Name`; -chomp($ReadFile1Name); -$ReadFile2Name = `basename $ReadFile2Name`; -chomp($ReadFile2Name); -my $start = time; -if($MapSoftware eq "bowtie") -{ - print "=================STAGE 1 MAPPING===================\n"; - system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly; - system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly; - print "STAGE 1 FINISHED IN ",time-$start," seconds\n"; - print "=================STAGE 2 ESTIMATION================\n"; - - system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly; - print "STAGE 2 FINISHED IN ",time-$start," seconds\n"; - print "=================STAGE 3 CUTOFF====================\n"; - system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel"); - print "STAGE 3 FINISHED IN ",time-$start," seconds\n"; - - - -} - -if($MapSoftware eq "rmap") -{ - print "=================STAGE 1 MAPPING===================\n"; - - system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly; - system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly; - print "=================STAGE 2 ESTIMATION================\n"; - - system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly; - print "=================STAGE 3 CUTOFF====================\n"; - system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel"); - - -} - -print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/TXdbgen --- a/SpliceTrap/bin/TXdbgen Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,97 +0,0 @@ -#!/usr/bin/perl -my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; -# this script is to generate TXdb database files from bed/gtf file - -use strict; -use Cwd; -use Getopt::Long; - -my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); - -} - - -my $genomedir = ""; - -my $annofilename = ""; -my $txdbname = "userdefined"; -my $knownonly = 0; -my $gtfinput = 0; - -GetOptions ( - "g:s"=>\$genomedir, - "a:s"=>\$annofilename, - "n:s"=>\$txdbname, - "gtf"=>\$gtfinput, - "knownonly"=>\$knownonly -); - -my $InputParaDes=" Usage of the script: - -g genome fasta file location - -a annotation file (bed/gtf) - -n txdb name - --gtf specify this if annotation file is in gtf format -"; - -if($genomedir eq "" or $annofilename eq "") -{ - print $InputParaDes; - exit; -} - -$genomedir = Cwd::abs_path($genomedir); -$annofilename = Cwd::abs_path($annofilename); - -my $annofilebase = `basename $annofilename`; -chomp($annofilebase); -#need a cache folder to avoid mess - -my $cachefolder = $annofilebase.".cache"; - -if (! -e $cachefolder) -{ - mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; -} -if($gtfinput) -{ - print "TXDBGEN: converting gtf file into bed format\n"; - system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); - $annofilename = "$cachefolder/$annofilebase.bed"; -} - - -print "TXDBGEN: scan $annofilename for AS events...\n"; -system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); -print "TXDBGEN: fetch sequences from $genomedir...\n"; -system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); -#get fasta file list -system("ls $genomedir/*.fa >$cachefolder/chr.list"); - -system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); - -print "TXDBGEN: generate files for parallel computing...\n"; -if (! -e "$cachefolder/parallel") -{ - mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; -} -system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); -system("rm $cachefolder/out.bed"); -system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); -system("rm $cachefolder/TXdb.tmp.evi"); -system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); -print "TXDBGEN: build Bowtie index...\n"; - -if (! -e "$cachefolder/btw") -{ - mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; -} -system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); -system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); -print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; - -system("mv $cachefolder $SrcFolder/../db/$txdbname"); -print "TXDBGEN: Done!\n"; - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/TXdbgen.pl --- a/SpliceTrap/bin/TXdbgen.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,95 +0,0 @@ -# this script is to generate TXdb database files from bed/gtf file - -use strict; -use Cwd; -use Getopt::Long; - -my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); -foreach my $program (@programs) -{ - die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); - -} - - -my $genomedir = ""; - -my $annofilename = ""; -my $txdbname = "userdefined"; -my $knownonly = 0; -my $gtfinput = 0; - -GetOptions ( - "g:s"=>\$genomedir, - "a:s"=>\$annofilename, - "n:s"=>\$txdbname, - "gtf"=>\$gtfinput, - "knownonly"=>\$knownonly -); - -my $InputParaDes=" Usage of the script: - -g genome fasta file location - -a annotation file (bed/gtf) - -n txdb name - --gtf specify this if annotation file is in gtf format -"; - -if($genomedir eq "" or $annofilename eq "") -{ - print $InputParaDes; - exit; -} - -$genomedir = Cwd::abs_path($genomedir); -$annofilename = Cwd::abs_path($annofilename); - -my $annofilebase = `basename $annofilename`; -chomp($annofilebase); -#need a cache folder to avoid mess - -my $cachefolder = $annofilebase.".cache"; - -if (! -e $cachefolder) -{ - mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; -} -if($gtfinput) -{ - print "TXDBGEN: converting gtf file into bed format\n"; - system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); - $annofilename = "$cachefolder/$annofilebase.bed"; -} - - -print "TXDBGEN: scan $annofilename for AS events...\n"; -system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); -print "TXDBGEN: fetch sequences from $genomedir...\n"; -system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); -#get fasta file list -system("ls $genomedir/*.fa >$cachefolder/chr.list"); - -system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); - -print "TXDBGEN: generate files for parallel computing...\n"; -if (! -e "$cachefolder/parallel") -{ - mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; -} -system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); -system("rm $cachefolder/out.bed"); -system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); -system("rm $cachefolder/TXdb.tmp.evi"); -system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); -print "TXDBGEN: build Bowtie index...\n"; - -if (! -e "$cachefolder/btw") -{ - mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; -} -system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); -system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); -print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; - -system("mv $cachefolder $SrcFolder/../db/$txdbname"); -print "TXDBGEN: Done!\n"; - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/apply_cutoff.sh --- a/SpliceTrap/bin/apply_cutoff.sh Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,36 +0,0 @@ -#SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8"; - -outputname=$1; -CutoffLevel=$2; -Outputfolder=$3 -SrcFolder=$5 -JunctionCut=$4 -noIRM=$8 - -echo "CUTOFF: Entering cutoff step..."; -echo "CUTOFF: Cache folder: $outputname.filter" -mkdir $Outputfolder/$outputname.filter -cd $Outputfolder/$outputname.filter -ln -s ../$outputname.ratio -ln -s ../$outputname.nums -echo "CUTOFF: spliting file....and generating shell scripts..." -split -11000 $outputname.ratio - -for ratiofiles in x* -do - echo "perl $SrcFolder/ApplyCutoff.jie.pl $ratiofiles $CutoffLevel $JunctionCut $noIRM > $ratiofiles.out" >>filter.sh -done - -echo "CUTOFF: submit scripts..." -perl $SrcFolder/batchqsub.pl filter.sh -echo "CUTOFF: merging file...." -cat *.out >../$outputname.raw -cd ../ -#perl /data/zhang/wuj/tools/SpliceTrap.0.8/ApplyCutoff.jie.pl $outputname.ratio $outputname.nums 8 >$outputname.txt -if [ "$noIRM" ];then - awk '{printf $22"\t"$2"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt -else - awk '{printf $22"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt -fi -rm $outputname.filter -rf - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/batch_para_cov10p_fit.sh --- a/SpliceTrap/bin/batch_para_cov10p_fit.sh Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,101 +0,0 @@ -#!/bin/bash -ReadFile1Name=`basename $1` -ReadFile2Name=`basename $2` -outputname=$3 -readsize=$4 -DatabasePrefix=$5 -Outputfolder=$6 -SrcFolder=$7 -noIRM=$8 - -cd $Outputfolder -if [ $ReadFile1Name != $ReadFile2Name ];then - echo "ESTIMATE: Getting fragment size information from data..." - perl $SrcFolder/get.frag.size.pl $ReadFile1Name.nomt $ReadFile2Name.nomt $readsize - perl $SrcFolder/get.hist.pl $ReadFile1Name.nomt.fragsize -w=1 -c=1 -else - echo "ESTIMATE: Generating the other half of reads..." - readnum=`wc -l $ReadFile1Name.nomt |cut -f1 -d" "` - for (( i=0; i<$readnum; i++ )) - do - echo "NM" >>$ReadFile1Name.f.nomt - done - echo "#Width:1" >$ReadFile1Name.nomt.fragsize.hist -fi -echo "ESTIMATE: Creating cache folder.." -if [ $ReadFile1Name != $ReadFile2Name ];then - ReadFile2FinalName=$ReadFile2Name.nomt -else - ReadFile2FinalName=$ReadFile1Name.f.nomt -fi - -mkdir $ReadFile1Name.result -cd $ReadFile1Name.result -ln -s ../$ReadFile1Name.nomt ./ -ln -s ../$ReadFile2FinalName ./ -ln -s ../$ReadFile1Name.nomt.fragsize.hist ./ -echo "ESTIMATE: Split mapping results via chromosomes..." -perl $SrcFolder/scan_nomt.pl $ReadFile1Name.nomt $ReadFile2FinalName -loopi=0 -echo "ESTIMATE: Generating shell scripts for Loop $loopi..." -while read chrlist -do - chr=`echo $chrlist |tr -d "\n"` - for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.* - do - base=`basename $dbfile` - echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -s $readsize" >>r$loopi.sh - done -done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list - -echo "ESTIMATE: Submit shell scripts for Loop $loopi..." -perl $SrcFolder/batchqsub.pl r$loopi.sh -echo "ESTIMATE: Loop $loopi done..." - -cat $ReadFile1Name.$loopi.*.ratio >$outputname.$loopi.ratio -cat $ReadFile1Name.$loopi.*.log >$outputname.$loopi.log -cat $ReadFile1Name.$loopi.*.nums >$outputname.$loopi.nums -rm $ReadFile1Name.$loopi.*.ratio -rm $ReadFile1Name.$loopi.*.log -rm $ReadFile1Name.$loopi.*.nums - - -if [ "$noIRM" ];then - echo "ESTIMATE: No IRM correction, skipped..." - mv $outputname.$loopi.ratio $outputname.ratio - mv $outputname.$loopi.log $outputname.log - mv $outputname.$loopi.nums $outputname.nums -else - - echo "ESTIMATE: derive IRMs from data..." - awk '{if ($15>=10) printf $0"\n"}' $outputname.$loopi.ratio >$outputname.mle - perl $SrcFolder/get_event_dist_fit.pl $outputname.mle -c=2 -w=0.001 - - loopi=1 - echo "ESTIMATE: Generating shell scripts for Loop $loopi..." - while read chrlist - do - chr=`echo $chrlist |tr -d "\n"` - for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.* - do - base=`basename $dbfile` - echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -b $outputname.mle.fit.hist -s $readsize" >>r$loopi.sh - done - done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list - echo "ESTIMATE: Submit shell scripts for Loop $loopi..." - -#perl $SrcFolder/qsub/batchqsub.pl r$loopi.sh $taskname - perl $SrcFolder/batchqsub.pl r$loopi.sh - echo "ESTIMATE: Loop $loopi done..." - cat $ReadFile1Name.$loopi.*.ratio >$outputname.ratio - cat $ReadFile1Name.$loopi.*.log >$outputname.log - cat $ReadFile1Name.$loopi.*.nums >$outputname.nums - rm $ReadFile1Name.$loopi.*.ratio - rm $ReadFile1Name.$loopi.*.log - rm $ReadFile1Name.$loopi.*.nums -fi - -mv $outputname.ratio $outputname.log $outputname.nums ../ -cd ../ -rm $ReadFile1Name.result -rf -rm $ReadFile1Name.nomt $ReadFile2FinalName |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/batchqsub.pl --- a/SpliceTrap/bin/batchqsub.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,158 +0,0 @@ - -# modified from Chenghai Xue's script - -#test if qsub works - -my $qsub_checker = 0; -if(system("hash qsub >/dev/null")) -{ - $qsub_checker = 0; -} -else -{ - $test_randname=random_sessid(); - system("mkdir $test_randname;"); - system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh"); - system("mkdir $test_randname/qsub_cache"); - system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname $test_randname/$test_randname.sh"); - - $status=0; - $sec=5; - while(1) - { - $chkresult=`qstat |grep $test_randname |wc -l`; - chomp($chkresult); - if ($chkresult == 0) - { - $sec=10; - $status++; - last if ($status==3); - } - else - { - $status=0; - $sec=5; - } - print "QSTAT: $chkresult testing tasks running.....$taskname\n"; - sleep($sec); - } - print "QSUB: testing done\n"; - print "$test_randname/$test_randname\n"; - if(-d "$test_randname/$test_randname") - { - $qsub_checker=1; - print "QSUB: working well!\n"; - } -#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`; - system("rm $test_randname -rf"); -} -#$qsub_checker=0; -if($qsub_checker == 0) -{ - print "QSUB: No GRID qsub found\n"; - print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n"; - print "QSUB: Running in serial mode...\n"; - system("sh $ARGV[0]"); - exit; -} - -$performListFile = $ARGV[0]; -$taskname = ""; -$taskname = $ARGV[1]; -if (not $taskname) -{ - $taskname=random_sessid(); -} -#$outfullDir = $ARGV[2]; -$outfullDir ="qsub_cache"; - -# correct path -if(! (-d $outfullDir) ){ - system ("mkdir $outfullDir"); -} - -# create a temp cache -@temp = split("/", $0); -$prog = pop @temp; -$cache = $outfullDir."/".$prog."_".$taskname; -if(! (-d $cache) ){ - system ("mkdir $cache"); -} - -open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n"; -@performList = (<IN_1>); -chomp @performList; -close IN_1 or die "can't close the input file : $!"; - - -$scriptListFile = $outfullDir."/".$taskname."_scripts.list"; -open (FSCRIPLIST, ">$scriptListFile"); -for($i=0; $i<@performList; $i++){ - $scriptFile = $outfullDir."/".$taskname."_script$i.sh"; - print FSCRIPLIST "$scriptFile\n"; - open (FOUT, ">$scriptFile"); - -# print FOUT "#!/usr/bin/sh\n"; - print FOUT "$performList[$i]\n"; - -# print OUT_1 "$outfile.map\n"; - close (FOUT); -} -close (FSCRIPLIST); - -open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n"; -$basename=`basename $performListFile`; -chop($basename); -$taskname=$taskname."_".$basename; -#print $basename; -while(<IN_2>){ - $f = $_; - chomp $f; - @temp = split("/", $f); - $base = pop @temp; - - #use default queues -# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n"; - #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); - system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); - -} -close IN_2 or die "can't close the input file : $!"; - -#start to check stats of qsub tasks -####################################### -my $taskname_query=substr($taskname,0,10); - - -$status=0; -$sec=60; -while(1) -{ - $chkresult=`qstat |grep $taskname_query |wc -l`; - chomp($chkresult); - if ($chkresult == 0) - { - $sec=10; - $status++; - last if ($status==3); - } - else - { - $status=0; - $sec=60; - } - print "QSTAT: $chkresult tasks running.....$taskname\n"; - sleep($sec); -} -print "QSUB: done: $taskname \n"; -###################################### - -# -sub random_sessid -{ - #my @chars = (0..9,a..z,A..Z); - my @chars = ('a'..'z','A'..'Z'); - my $len = 10; - my $string = join '', map {$chars[rand(@chars)]} (1..$len); - return $string; -} |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/batchqsub.pl_orig --- a/SpliceTrap/bin/batchqsub.pl_orig Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,158 +0,0 @@ - -# modified from Chenghai Xue's script - -#test if qsub works - -my $qsub_checker = 0; -if(system("hash qsub >/dev/null")) -{ - $qsub_checker = 0; -} -else -{ - $test_randname=random_sessid(); - system("mkdir $test_randname;"); - system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh"); - system("mkdir $test_randname/qsub_cache"); - system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname $test_randname/$test_randname.sh"); - - $status=0; - $sec=5; - while(1) - { - $chkresult=`qstat |grep $test_randname |wc -l`; - chomp($chkresult); - if ($chkresult == 0) - { - $sec=10; - $status++; - last if ($status==3); - } - else - { - $status=0; - $sec=5; - } - print "QSTAT: $chkresult testing tasks running.....$taskname\n"; - sleep($sec); - } - print "QSUB: testing done\n"; - print "$test_randname/$test_randname\n"; - if(-d "$test_randname/$test_randname") - { - $qsub_checker=1; - print "QSUB: working well!\n"; - } -#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`; - system("rm $test_randname -rf"); -} -#$qsub_checker=0; -if($qsub_checker == 0) -{ - print "QSUB: No GRID qsub found\n"; - print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n"; - print "QSUB: Running in serial mode...\n"; - system("sh $ARGV[0]"); - exit; -} - -$performListFile = $ARGV[0]; -$taskname = ""; -$taskname = $ARGV[1]; -if (not $taskname) -{ - $taskname=random_sessid(); -} -#$outfullDir = $ARGV[2]; -$outfullDir ="qsub_cache"; - -# correct path -if(! (-d $outfullDir) ){ - system ("mkdir $outfullDir"); -} - -# create a temp cache -@temp = split("/", $0); -$prog = pop @temp; -$cache = $outfullDir."/".$prog."_".$taskname; -if(! (-d $cache) ){ - system ("mkdir $cache"); -} - -open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n"; -@performList = (<IN_1>); -chomp @performList; -close IN_1 or die "can't close the input file : $!"; - - -$scriptListFile = $outfullDir."/".$taskname."_scripts.list"; -open (FSCRIPLIST, ">$scriptListFile"); -for($i=0; $i<@performList; $i++){ - $scriptFile = $outfullDir."/".$taskname."_script$i.sh"; - print FSCRIPLIST "$scriptFile\n"; - open (FOUT, ">$scriptFile"); - -# print FOUT "#!/usr/bin/sh\n"; - print FOUT "$performList[$i]\n"; - -# print OUT_1 "$outfile.map\n"; - close (FOUT); -} -close (FSCRIPLIST); - -open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n"; -$basename=`basename $performListFile`; -chop($basename); -$taskname=$taskname."_".$basename; -#print $basename; -while(<IN_2>){ - $f = $_; - chomp $f; - @temp = split("/", $f); - $base = pop @temp; - - #use default queues -# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n"; - #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); - system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); - -} -close IN_2 or die "can't close the input file : $!"; - -#start to check stats of qsub tasks -####################################### -my $taskname_query=substr($taskname,0,10); - - -$status=0; -$sec=60; -while(1) -{ - $chkresult=`qstat |grep $taskname_query |wc -l`; - chomp($chkresult); - if ($chkresult == 0) - { - $sec=10; - $status++; - last if ($status==3); - } - else - { - $status=0; - $sec=60; - } - print "QSTAT: $chkresult tasks running.....$taskname\n"; - sleep($sec); -} -print "QSUB: done: $taskname \n"; -###################################### - -# -sub random_sessid -{ - #my @chars = (0..9,a..z,A..Z); - my @chars = ('a'..'z','A'..'Z'); - my $len = 10; - my $string = join '', map {$chars[rand(@chars)]} (1..$len); - return $string; -} |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/beta_fit.R --- a/SpliceTrap/bin/beta_fit.R Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,39 +0,0 @@ -args = commandArgs(); -input_file=args[4]; -#input_file="control_a.0.1.flt.ratio.tmpca"; -#print (input_file); - - -library(MASS); - -p=array(0,dim=1000); - -for (i in 0:999) -{ - p[i]=0.001 -} - -if ( file.info(input_file)["size"]>0 ) -{ - -data=read.table(input_file); -col=1; -x=data[,col]; -x1=x; -if (length(x)>10) -{ - x1[x==0] <- .Machine$double.eps; - x1[x==1] <- (1-.Machine$double.eps); - xbar=mean(x1) - xvar=var(x1) - a <- (xbar*(1-xbar)/xvar - 1)*xbar - b <- (1-xbar)*a/xbar - (f=fitdistr(x1,"beta",list(shape1=a,shape2=b))) - for (i in 0:999) - { - p[i]=dbeta(i/1000,f[["estimate"]][["shape1"]],f[["estimate"]][["shape2"]]) - } -} - -} -write(p,file=paste(input_file,"fit",sep="."),ncolumns=1); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/bowtie2eland.pl --- a/SpliceTrap/bin/bowtie2eland.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,175 +0,0 @@ -use strict; - -my $bowtiefilename=$ARGV[0]; -my $readsfilename=$ARGV[1]; -my $elandfilename=$ARGV[2]; - -open(readsfile, $readsfilename); - -my $detectformat=`head -c 1 $readsfilename`; - -#my $firstletter=$detectformat; -#my $looplinenumbers=4; - -#$looplinenumbers=2 if ($detectformat eq ">"); -open(bowtiefile, $bowtiefilename); -open(elandfile, ">".$elandfilename); -my $readfilelinenum=0; -# hash the positions of the alignments for each read id -my %readposhash; -my $bowtiepos = tell (bowtiefile); -while (my $bowtieline=<bowtiefile>) -{ - my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); - if (not exists $readposhash{$bowtiereadname} ) - { - $readposhash{$bowtiereadname} = $bowtiepos; - } - $bowtiepos = tell (bowtiefile); -} - -while(my $readline=<readsfile>) -{ - $readfilelinenum++; - if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) - { - chomp($readline); - my $readname=substr($readline, 1, length($readline)-1); - if( not exists $readposhash{$readname} ) - { - print elandfile $readname,"\tNA\tNM\n"; - next; - } - else - { - my @mapped_ids=(); - my @mapped_pos=(); - my @mapped_strand=(); - seek(bowtiefile, $readposhash{$readname}, 0); - while (my $bowtieline=<bowtiefile>) - { - my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); - if($readname eq $bowtiereadname) - { - push(@mapped_ids, $mapped_id); - push(@mapped_pos,$pos); - push(@mapped_strand,$strand); - } - else - { - last; - } - - } - print elandfile $readname,"\t"; - print elandfile "NA\t"; - print elandfile scalar(@mapped_ids),":0:0\t"; - for(my $i=0;$i<@mapped_ids;$i++) - { - print elandfile "/",$mapped_ids[$i]; - print elandfile ":",$mapped_pos[$i]+1; - if($mapped_strand[$i] eq "+") - { - print elandfile "F0,"; - } - else - { - print elandfile "R0,"; - } - - } - print elandfile "\n"; - - } - } -} - -close(elandfile); -close(bowtiefile); -close(readsfile); - -exit; -while(my $bowtieline=<bowtiefile>) -{ - my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); - while(my $readline=<readsfile>) - { - $readfilelinenum++; - if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) - #if($readline=~/^$detectformat/) - { - chomp($readline); - my $readname=substr($readline, 1, length($readline)-1); - - - if($readname ne $bowtiereadname) - { - print elandfile $readname,"\tNA\tNM\n"; - next; - } - else - { - my @mapped_ids=(); - my @mapped_pos=(); - my @mapped_strand=(); - push(@mapped_ids, $mapped_id); - push(@mapped_pos,$pos); - push(@mapped_strand,$strand); - while(1) - { - $bowtieline=<bowtiefile>; - my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); - if( $bowtiereadname eq $readname ) - { - push(@mapped_ids, $mapped_id); - push(@mapped_pos,$pos); - push(@mapped_strand,$strand); - } - else - { - seek(bowtiefile, -1*length($bowtieline),1); - print elandfile $readname,"\t"; - print elandfile "NA\t"; - print elandfile scalar(@mapped_ids),":0:0\t"; - for(my $i=0;$i<@mapped_ids;$i++) - { - print elandfile "/",$mapped_ids[$i]; - print elandfile ":",$mapped_pos[$i]+1; - if($mapped_strand[$i] eq "+") - { - print elandfile "F0,"; - } - else - { - print elandfile "R0,"; - } - - } - print elandfile "\n"; - last; - } - } - last; - - } - } - } -} - -while(my $readline=<readsfile>) -{ - $readfilelinenum++; - if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) - #if($readline=~/^$detectformat/) - { - chomp($readline); - my $readname=substr($readline, 1, length($readline)-1); - print elandfile $readname,"\tNA\tNM\n"; - } -} - -close(elandfile); -close(bowtiefile); - - -close(readsfile); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/calc_pval.R --- a/SpliceTrap/bin/calc_pval.R Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,29 +0,0 @@ -args = commandArgs(); -input_file=args[4]; -av=as.numeric(args[5]); -sd=as.numeric(args[6]); -nu=as.numeric(args[7]); - - -data=read.table(input_file); - -col=4; -x=data[,col]; -pup=pnorm(x, mean=av, sd=sd, lower.tail = FALSE); -adpup=p.adjust(pup,method="fdr"); -pdn=pnorm(x, mean=av, sd=sd, lower.tail = TRUE); -adpdn=p.adjust(pdn,method="fdr"); - -p=pup; - -size = length(x); - -for (i in 1:size) -{ - if(x[i]<0) - { - p[i]=pdn[i]; - } -} -write(p, file=paste(input_file,"p",sep="."),ncolumns=1); - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/downloaddb.pl --- a/SpliceTrap/bin/downloaddb.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,54 +0,0 @@ -use strict; -use Cwd; - -my %flags=( - "hg18"=>0, - "mm9"=>0, - "rn4"=>0, -); -my $PROG = $0; -my $PROG_ABS_PATH = Cwd::abs_path($PROG); -$PROG_ABS_PATH = `dirname $PROG_ABS_PATH`; -chomp($PROG_ABS_PATH); -print "\n\tPrepare to download databases from CSHL...\n"; - -print "\tWhich database(s) do you want to download?\n"; -print "\tChoose from\t "; -foreach my $key (keys %flags) -{ - print $key,"/"; -} -print "ALL \n\n\tseparated by blank...don't enter anything if you don't want to download!\n\nPlease enter:[NONE]"; - -my @dbnames = split(/\s+/,<>); -for (my $i=0;$i<@dbnames;$i++) -{ - if (uc($dbnames[$i]) eq "ALL" ) - { - foreach my $key (keys %flags) - { - $flags{$key}=1; - } - last; - } - - if( exists $flags{$dbnames[$i]}) - { - $flags{$dbnames[$i]} = 1; - } - #system ("wget http://rulai.cshl.edu/splicetrap/db/") -} - -foreach my $key (keys %flags) -{ - if ($flags{$key} ==1) - { - system ("wget http://rulai.cshl.edu/splicetrap/db/".$key.".tar.gz"); - print "untar the database file for $key...please wait..."; - system ("tar -ixzf $key.tar.gz"); - system ("rm $key.tar.gz"); - mkdir "$key/parallel"; - print "creating files for parallel computing...\n"; - system("bash $PROG_ABS_PATH/splitdb.sh $key/parallel") - } -} |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get.frag.size.pl --- a/SpliceTrap/bin/get.frag.size.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,55 +0,0 @@ -#for the results from paired end -#The two inputs are the results from the two ends -use strict; -my $read_size = $ARGV[2]; -open(input1, $ARGV[0]); -open(input2, $ARGV[1]); -open(output, ">$ARGV[0].fragsize"); -#open(fusefile,">$ARGV[0].fuse"); - -#my $LongMarker="L"; -#my $ShortMarker="S"; - -while(my $line1=<input1>) -{ - my $line2=<input2>; - chomp($line1); - chomp($line2); - next if($line1=~/$\NM/ or $line2=~/$\NM/ or $line1=~/$\MT/ or $line2=~/$\MT/); - my @array1 = split("\t",$line1); - #my $read_size=length($array1[1]); - my @array2 = split("\t",$line2); - my $match1=$array1[3]; - my $match2=$array2[3]; -# my $marker=$LongMarker.$ShortMarker; - my @sizes=(); - #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g) - while($match1=~/\/(\S[^,]*\[\w+\])\S[^,]*:(\d*)[RF]/g) - { - my $name=$1; - my $posa=$2; - #print $name,"\n"; - - if($match2=~/\Q$name\E\S[^,]*:(\d*)[RF]/) - { - #print "match\n"; - my $posb=$1; - push @sizes, abs($posb-$posa)+$read_size; - - } - - } - my %saw; - @saw{@sizes}=(); - my @keya= keys %saw; - #print scalar(@keya),"\n"; - if(scalar(@keya)==1) - { - print output $keya[0],"\n"; - } - -} -#close(fusefile); -close(output); -close(input2); -close(input1); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get.hist.pl --- a/SpliceTrap/bin/get.hist.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,49 +0,0 @@ -use Getopt::Long; -use strict; - -my $InputFileName=$ARGV[0]; -my $OutputFileName = $ARGV[0].".hist"; -#$OutputFileName=$ARGV[1] if $ARGV[1] ne ""; -my $width=0.01; -my $verbose=1; -my $col=2; -my $start=0; -my $end=1.000; - - -GetOptions ( - 'w:f'=>\$width, - 'c:i'=>\$col, - 'start:f'=>\$start, - 'end:f'=>\$end, - 'v'=>\$verbose -); - -$width=$width*1; -#print "IRM: #Generate hist with delta width of $width \n"; -#print "IRM: #data source from col $col\n"; - -$col=$col-1; - -my @hist; -my $totalnum=0; - -open(Input, $InputFileName); -while(my $line=<Input>) -{ - next if($line=~/^#/); - chomp($line); - my @array=split(/\s/,$line); - $hist[int($array[$col]/$width)]++; - $totalnum++; - -} -close(Input); - -open(OutputFile, ">$OutputFileName"); -print OutputFile "#Width:$width\n"; -for(my $i=0;$i<@hist;$i++) -{ - print OutputFile $hist[$i]/$totalnum,"\n"; -} -close(OutputFile); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get_bed_fa_j.pl --- a/SpliceTrap/bin/get_bed_fa_j.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,330 +0,0 @@\n-# Adapted from Chenghai Xue\'s script\n-\n-$starttime=time();\n-\n-$input_file_1 = $ARGV[0];\t# exon junction file\n-$input_file_2 = $ARGV[1];\t# genome file list\n-$output_file_1 = $ARGV[2];\t# exon junction bed (might be less than input_file_1\n-$output_file_2 = $ARGV[3];\t# exon junction fa\n-#$leftLen = $ARGV[4];\n-#$rightLen = $ARGV[5];\n-\r\n-open(IN_1, "$input_file_1") or die "can\'t open the input file : $!";\n-open(IN_2, "$input_file_2") or die "can\'t open the input file : $!";\n-open OUT_1, ">$output_file_1" or die "Can not open output_file : $!";\r\n-open OUT_2, ">$output_file_2" or die "Can not open output_file : $!";\r\n-\n-@chromList = (<IN_2>);\n-chomp(@chromList);\n-$len_chromList = @chromList;\n-print "BED2FA: in $input_file_2, found $len_chromList chromosomes\\n";\n-foreach $one (@chromList){\n-\tif($one =~ /\\/(chr.[^\\/]*?)\\.*fa$/i){\n-\t\t$chr_hash{$1} = $one;\n-\t\t#print $1,"\\n";\t\n-\t}\n-}\n-@key_chr_hash = keys(%chr_hash);\r\n-$len_key_chr_hash = @key_chr_hash;\r\n-@sort_key_chr_hash = sort_chromNo(@key_chr_hash);\n-$len_sort_key_chr_hash = @sort_key_chr_hash;\n-#for($i=0; $i<$len_sort_key_chr_hash; $i++){\r\n-#\tprint "$sort_key_chr_hash[$i]\t$chr_hash{$sort_key_chr_hash[$i]}\\n";\r\n-#}\n-\n-$num_1=0;\n-$num_2=0;\n-$num_count_chrom=0;\n-my ($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts);\r\n-$current_chrom = "";\n-while(<IN_1>){\n-\t$num_1++;\r\n-\t$line = $_;\n-\tchomp $line;\n-\t#print $line,"\\n";\n-\t@cols = split ("\\t", $line);\n- if(scalar(@cols)==12)\n-\t{\n-\t($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts) = @cols;\n-\t}\n-\tif(scalar(@cols)!=12)\n-\t{\n-\t\t($chrom, $chromStart, $chromEnd, $name, $score, $strand)=@cols;\n-\t\t$thickStart=$chromStart;\n-\t#\tprint $thickStart,"\\n";\n-\t\t$thickEnd = $chromEnd;\n-\t\t$blockCount=1;\n-\t\t$blockSizes=$chromEnd-$chromStart;\n-\t\t$blockStarts = 0;\t\n-\t}\n-\t$strand="+" if !$strand;\n-\t@a_blockSizes = split (/\\,/, $blockSizes);\n-\t@a_blockStarts = split (/\\,/, $blockStarts);\n-\tif($chrom ne $current_chrom){\n-\t\tif($num_1 != 1){\n-\t\t\tprint "$num_chr_1\t$num_chr_2\t$len_contigSeqStr\\n";\t\t\t\n-\t\t}\n-\t\tprint "BED2FA: $chrom:\t";\n-\t\t\n-\t\t$num_chr_1=0;\n-\t\t$num_chr_2=0;\t\t\n-\n-\t\tif(exists $chr_hash{$chrom}){\n-\t\t\t$num_count_chrom++;\n-\t\t\t$current_chrom = $chrom;\n-\t\t\t#print $current_chrom,"\\n";\n-#=pod\t\t\t\n-\t\t\t$chromFastaFile = $chr_hash{$chrom};\n-\t\t\t#print $chromFastaFile,"\\n";\n-\t\t\topen($fin, "<$chromFastaFile") or die "can\'t open the chrom file : $!";\n-\t\t\tlocal ($/) = undef;\n-\t\t\t$contigSeqStr = <$fin>;\n-\t\t\tclose ($fin);\n-\t\t\t#print $contigSeqStr,"mark\\t";\n-\t\t\t$contigSeqStr =~s/^\\>.*?\\n//g;\n- #print $contigSeqStr,"mark2\\t";\n-\n-\t\t\t$contigSeqStr =~s/\\s|\\n//g;\n- #print $contigSeqStr,"mark3\\n";\n-\n-\t\t\t$len_contigSeqStr = length $contigSeqStr;\n-#=cut\n-\t\t}\n-\t\telse{\n-\t\t\t$num_chr_1++;\n-\t\t\tnext;\n-\t\t}\n-\t}\n-\t$num_chr_1++;\n-\t\r\n-# modify from here................................\n-\tmy @Starts;\n-\tmy @Ends;\n-\tmy @JuncSeq;\n-\tmy $ssStrTag=1;\n-\tfor($i_wuj=0;$i_wuj<$blockCount;$i_wuj++)\n-\t{\n-\t\t$Starts[$i_wuj] = $chromStart + $a_blockStarts[$i_wuj];\n-\t\t$Ends[$i_wuj] = $Starts[$i_wuj] + $a_blockSizes[$i_wuj];\n-\t\t$JuncSeq[$i_wuj] = uc substr ($contigSeqStr,$Starts[$i_wuj], $a_blockSizes[$i_wuj]);\n-\t\tif($strand eq "-"){\n-\t\t $JuncSeq[$i_wuj] = uc string_reverse_complement(lc $JuncSeq[$i_wuj]);\n-\t\t}\n-\t}\t \n- # for($i_wuj=0;$i_wuj<$blockCount-1;$i_wuj++)\n-#\t{\n-#\t $ssStr = uc substr ($contigSeqStr, $Ends[$i_wuj], 2) . substr ($contigSeqStr, $Starts[$i_wuj+1] - 2, 2);\n-#\t if($strand eq "-"){\n- # $ssStr = uc string_reverse_complement(lc $ssStr);\n- #$ssStr = $rc_ssStr;\n-#\t }\n-#\t\t$ssStrTag = 0 if ($ssStr ne "GTAG");\n-\t\t\n- # }\n-#\tif($ssStrTag ==1){\n- if(1){\n-\t\t$num_2++;\n-\t\t$num_chr_2++;\n-\t\tprint OUT_1 "$line\\n";\n-\t\t#print OUT_2 ">$name\\|$chrom\\|$chromStart\\|$chromEnd\\|$strand\\|$ssStr\\|$num_2\\n$junctionSeqStrLeft'..b'ret;\r\n-}\n-\r\n-sub sort_chromNo{\n-\tlocal(@chrom) = @_;\n-\tlocal($len_key_chr_hash, $i, @sort_chr_hash);\n-\tlocal(@digit_random, @words_random, @digit_other_1, @digit_other_2, @words_other_1, @words_other_2, @digit, @words);\n-\tlocal(@sort_digit, @sort_words, @sort_digit_random, @sort_words_random, @sort_digit_other, @sort_words_other);\n-\tlocal($len_digit, $len_words, $len_digit_random, $len_words_random, $len_digit_other, $len_words_other, $term);\n-\t\n-\t$len_key_chr_hash = @chrom;\n-\t# sort via chr number for printing result\r\n-\tfor($i=0; $i<$len_key_chr_hash; $i++){\n-\t\tif($key_chr_hash[$i] =~ /chr(\\d+)\\_random/){\r\n-\t\t\tpush(@digit_random, $1);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_random/){\r\n-\t\t\tpush(@words_random, $1);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)\\_([\\w\\d\\_]+)/){\r\n-\t\t\tpush(@digit_other_1, $1);\n-\t\t\tpush(@digit_other_2, $2);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_([\\w\\d\\_]+)/){\r\n-\t\t\tpush(@words_other_1, $1);\n-\t\t\tpush(@words_other_2, $2);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)/){\r\n-\t\t\tpush(@digit, $1);\r\n-\t\t}\r\n-\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)/){\r\n-\t\t\tpush(@words, $1);\r\n-\t\t}\r\n-\t\telse{\r\n-\t\t\tprint "BED2FA: There is unknown type of chromosomes: $key_chr_hash[$i]\\n";\r\n-\t\t}\r\n-\t}\r\n-\t@sort_digit = sort by_mostly_numeric @digit;\r\n-\t@sort_words = sort by_mostly_string @words;\n-\t@sort_digit_random = sort by_mostly_numeric @digit_random;\r\n-\t@sort_words_random = sort by_mostly_string @words_random;\n-\t@sort_digit_other = sort_2_array_number_string(\\@digit_other_1, \\@digit_other_2);\r\n-\t@sort_words_other = sort_2_array_string_string(\\@words_other_1, \\@words_other_2);\r\n-\t\r\n-\t$len_digit = @sort_digit;\r\n-\tfor($i=0; $i<$len_digit; $i++){\r\n-\t\t$term = "chr".$sort_digit[$i];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\r\n-\t$len_words = @sort_words;\r\n-\tfor($i=0; $i<$len_words; $i++){\r\n-\t\t$term = "chr".$sort_words[$i];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\n-\t$len_digit_random = @sort_digit_random;\r\n-\tfor($i=0; $i<$len_digit_random; $i++){\r\n-\t\t$term = "chr".$sort_digit_random[$i]."_random";\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\r\n-\t$len_words_random = @sort_words_random;\r\n-\tfor($i=0; $i<$len_words_random; $i++){\r\n-\t\t$term = "chr".$sort_words_random[$i]."_random";\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\t\r\n-\t$len_digit_other = @sort_digit_other;\r\n-\tfor($i=0; $i<$len_digit_other; $i=$i+2){\r\n-\t\t$term = "chr".$sort_digit_other[$i]."_".$sort_digit_other[$i+1];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\r\n-\t$len_words_other = @sort_words_other;\r\n-\tfor($i=0; $i<$len_words_other; $i=$i+2){\r\n-\t\t$term = "chr".$sort_words_other[$i]."_".$sort_words_other[$i+1];\r\n-\t\tpush(@sort_chr_hash, $term);\r\n-\t}\t\n-\t\n-\treturn @sort_chr_hash;\n-}\r\n-\n-sub sort_2_array_number_string{\r\n-\tlocal($a, $b) = @_;\r\n-\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n-\tlocal(@ret);\r\n-\t\r\n-\t$len_a = @$a;\r\n-\t$len_b = @$b;\r\n-\tif($len_a == $len_b){\r\n-\t\tfor($i=0; $i<$len_a; $i++){\r\n-\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n-\t\t}\r\n-\t\tfor $one (sort by_mostly_numeric keys %family) {\r\n-\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n-\t\t\t\t\tpush(@ret, $one);\r\n-\t\t\t\t\tpush(@ret, $two);\r\n-\t\t\t}\t\r\n-\t\t}\r\n-\t}\r\n-\telse{\t\t\r\n-\t\tprint "ERROR: Sort array is not same size\\n";\r\n-\t\tprint "a $len_a, b $len_b\\n";\r\n-\t}\t\r\n-\t\r\n-\treturn @ret;\r\n-}\n-\n-sub sort_2_array_string_string{\r\n-\tlocal($a, $b) = @_;\r\n-\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n-\tlocal(@ret);\r\n-\t\r\n-\t$len_a = @$a;\r\n-\t$len_b = @$b;\r\n-\tif($len_a == $len_b){\r\n-\t\tfor($i=0; $i<$len_a; $i++){\r\n-\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n-\t\t}\r\n-\t\tfor $one (sort by_mostly_string keys %family) {\r\n-\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n-\t\t\t\t\tpush(@ret, $one);\r\n-\t\t\t\t\tpush(@ret, $two);\r\n-\t\t\t}\t\r\n-\t\t}\r\n-\t}\r\n-\telse{\t\t\r\n-\t\tprint "ERROR: Sort array is not same size\\n";\r\n-\t\tprint "a $len_a, b $len_b\\n";\r\n-\t}\t\r\n-\t\r\n-\treturn @ret;\r\n-}\n-\n-sub by_mostly_numeric{\r\n-#\t( $a <=> $b ) || ( $a cmp $b );\r\n-\t( $a <=> $b );\r\n-}\r\n-\r\n-sub by_mostly_string{\r\n-#\t( $a <=> $b ) || ( $a cmp $b );\r\n-\t( $a cmp $b );\r\n-}\n-\r\n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/get_event_dist_fit.pl --- a/SpliceTrap/bin/get_event_dist_fit.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,108 +0,0 @@ -use Getopt::Long; -use strict; - -use Cwd; -my $PROG = $0; -my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); -my $PROG_ABS_PATH = Cwd::abs_path($PROG); -my $SrcFolder=`dirname $PROG_ABS_PATH`; -chomp($SrcFolder); - -#my $SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8/"; -my $InputFileName=$ARGV[0]; -my $OutputFileName = $ARGV[0].".hist"; -#$OutputFileName=$ARGV[1] if $ARGV[1] ne ""; -my $width=0.001; -my $verbose=1; -my $col=2; - -GetOptions ( - 'w:f'=>\$width, - 'c:i'=>\$col, - 'v'=>\$verbose -); - -$width=$width*1; -my $binnum=1/$width; -$col=$col-1; - -my @CAratios; -my @CSratios; -my @ADratios; -my @AAratios; -my @AIratios; -my @IRratios; - -open(Input, $InputFileName); -while(my $line=<Input>) -{ - next if($line=~/^#/); - chomp($line); - my @array=split(/\s/,$line); - next if($array[$col]<=0.001 or $array[$col]>=0.999); -# push( @CAratios,$array[$col]) if($array[0]=~/^C[AS]/ or $array[0]=~/^ME/); - push( @CAratios,$array[$col]) if($array[0]=~/^CA/ or $array[0]=~/^ME/); - push( @CSratios,$array[$col]) if($array[0]=~/^CS/); - push( @ADratios,$array[$col]) if($array[0]=~/^AD/); - push( @AAratios,$array[$col]) if($array[0]=~/^AA/); - push( @AIratios,$array[$col]) if($array[0]=~/^AI/); - push( @IRratios,$array[$col]) if($array[0]=~/^IR/); - -} - -close(Input); - -open(tmpFile, ">$InputFileName.tmpca"); -for(my $i=0;$i<@CAratios;$i++) -{ - print tmpFile $CAratios[$i],"\n"; -} -close(tmpFile); -open(tmpFile, ">$InputFileName.tmpcs"); -for(my $i=0;$i<@CSratios;$i++) -{ - print tmpFile $CSratios[$i],"\n"; -} -close(tmpFile); - - -open(tmpFile, ">$InputFileName.tmpad"); -for(my $i=0;$i<@ADratios;$i++) -{ - print tmpFile $ADratios[$i],"\n"; -} -close(tmpFile); - -open(tmpFile, ">$InputFileName.tmpaa"); -for(my $i=0;$i<@AAratios;$i++) -{ - print tmpFile $AAratios[$i],"\n"; -} -close(tmpFile); - -open(tmpFile, ">$InputFileName.tmpai"); -for(my $i=0;$i<@AIratios;$i++) -{ - print tmpFile $AIratios[$i],"\n"; -} -close(tmpFile); - -open(tmpFile, ">$InputFileName.tmpir"); -for(my $i=0;$i<@IRratios;$i++) -{ - print tmpFile $IRratios[$i],"\n"; -} -close(tmpFile); - -system("R --slave --args $InputFileName.tmpca <$SrcFolder/beta_fit.R"); -system("R --slave --args $InputFileName.tmpad <$SrcFolder/beta_fit.R"); -system("R --slave --args $InputFileName.tmpaa <$SrcFolder/beta_fit.R"); -#system("R --slave --args $InputFileName.tmpai <$SrcFolder/R/beta_fit.R"); -system("R --slave --args $InputFileName.tmpir <$SrcFolder/beta_fit.R"); -system("R --slave --args $InputFileName.tmpcs <$SrcFolder/beta_fit.R"); - -system("echo '#Width:$width' >$InputFileName.fit.hist"); -#system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpai.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist"); - -system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist"); -#system("rm $InputFileName.tmpca* $InputFileName.tmpad* $InputFileName.tmpaa* $InputFileName.tmpai* $InputFileName.tmpir* "); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/gtf2bed.pl --- a/SpliceTrap/bin/gtf2bed.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,81 +0,0 @@ -# rewrite on Sep 7th,2022 - -#part of package SpliceTrap - -#Jie Wu -use strict; - -my $inputfilename = $ARGV[0]; - -# input file is a gtf file, -# "transcript_id" is required for each line and should not be ambiguous. -# only the "exon" lines are used - -my %chr_hash; -my %strand_hash; -my %tx_exons; #tx_exons{$tx_id){$start} = $size; - -my $linenum = 0; - -open(input, $inputfilename); - -while(my $line=<input>) -{ - $linenum++; - my @a = split("\t",$line); - if ($a[2] eq "exon") - { - my $txid; - if($a[8]=~/transcript_id "(\S*?)"/) - { - $txid = $1; - } - else - { - die ("$inputfilename format error! No transcript_id in line $linenum \n"); - } - - if( exists $chr_hash{$txid} and $chr_hash{$txid} ne $a[0]) - { - warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped \n"); - next; - } - if( exists $strand_hash{$txid} and $strand_hash{$txid} ne $a[6]) - { - warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped\n"); - } - $chr_hash{$txid} = $a[0]; - $strand_hash{$txid} = $a[6]; - $tx_exons{$txid}{$a[3]} = $a[4] - $a[3] +1; - - } - -} - -foreach my $txid (keys %chr_hash) -{ - my @starts; - my @sizes; - foreach my $start (sort {$a<=>$b} (keys %{$tx_exons{$txid}} ) ) - { - push (@starts, $start); - push (@sizes, $tx_exons{$txid}{$start}); - } - my $exon_num = scalar(@sizes); - my $starts_str = ""; - for(my $i = 0; $i < $exon_num; $i++) - { - $starts_str = $starts_str.($starts[$i] - $starts[0]).","; - if($i>0) - { - warn "$txid, intron size..".($starts[$i]-$starts[$i-1])."\n" if ($starts[$i]-$starts[$i-1]>1000000); - } - } - my $sizes_str = join(",",@sizes); - my $end = $starts[$exon_num-1] + $sizes[$exon_num-1] -1; - print join("\t",$chr_hash{$txid}, $starts[0]-1, $end, $txid,"0",$strand_hash{$txid},$starts[0]-1, $end, "255,0,0",$exon_num,$sizes_str, $starts_str); - print "\n"; -} - - -close(input); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/mapping_bowtie.sh --- a/SpliceTrap/bin/mapping_bowtie.sh Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,89 +0,0 @@ -#!/bin/bash -#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8' -InputFileName=$1 -faorfq=$2 -DatabasePrefix=$3 -Outputfolder=$4 -SrcFolder=$5 -Threads=$6 -DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/btw/TXdb' -TmpFolderName=`basename $1` -#fasta or fastq - -cd $Outputfolder; -#prepare the folder -if [ -d $TmpFolderName.result ];then - echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !" - echo "MAPPING: !!!change the name of that folder first in case I erase them..." - exit -fi -echo "MAPPING: Start mapping $InputFileName...Creating cache folder $TmpFolderName.result" -mkdir $TmpFolderName".result" -mkdir $TmpFolderName".result"/cache - -cd $TmpFolderName".result" -cd cache -echo "MAPPING: Split to pieces ..." -split -l 1000000 $InputFileName -for name in x* -do - - if [ $faorfq == "fasta" ];then - add="-f" - fi -# if [ $name != $InputFileName ];then - echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh - echo $name >>checklist -# fi -done - -echo "MAPPING: submit scripts..." -perl $SrcFolder/batchqsub.pl map.sh - -tasknum=`wc -l map.sh |tr -d "\n"` -#checking.. - -echo "MAPPING: mapping $InputFileName to TXdb done...start to check.." -while [ 1 ] -do - if [ -f mapcheck.sh ];then - rm mapcheck.sh - fi - while read checklist - do - - name=`echo $checklist |tr -d "\n"` - echo "MAPPING: checking $name...." - readnum=`wc -l $name | cut -f1 -d" "` - if [ $faorfq == "fasta" ];then - readnum=`echo "$readnum/2"|bc` - else - readnum=`echo "$readnum/4"|bc` - fi - if [ -f $name.nomt ];then - bowtienum=`wc -l $name.nomt | cut -f1 -d" "` - else - bowtienum=0 - fi - if [ $bowtienum != $readnum ];then - echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh - - fi - done <checklist - if [ -f mapcheck.sh ];then - checktasknum=`wc -l mapcheck.sh |tr -d "\n"` - if [ $checktasknum == $tasknum ];then - echo "MAPPING: warning! none of the mapping tasks properly finished!" - fi - echo "MAPPING: resubmit TASKS...." - perl $SrcFolder/batchqsub.pl mapcheck.sh - else - break - fi -done -echo "MAPPING: Done.....merging files..." -cat *.nomt >$Outputfolder/$TmpFolderName.nomt -cd ../../ -rm $TmpFolderName.result -rf -#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a $DatabaseFolderTXdb -f - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/mapping_rmap.sh --- a/SpliceTrap/bin/mapping_rmap.sh Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,80 +0,0 @@ -#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a --best /data/zhang/wuj/database/hg18/AS/TXdb.2/btw/TXdb -f -t s_1_sequence.txtparta >s_1.map & -#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8' -InputFileName=$1 -faorfq=$2 -DatabasePrefix=$3 -Outputfolder=$4 -SrcFolder=$5 -DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/TXdb.fasta' -TmpFolderName=`basename $1` - -cd $Outputfolder; -#prepare the folder -if [ -d $TmpFolderName.result ];then - echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !" - echo "MAPPING: !!!change the name of that folder first in case I erase them..." - exit -fi -echo "MAPPING: Start to map $InputFileName....Creating cache folder $TmpFolderName.result" -mkdir $TmpFolderName".result" -cd $TmpFolderName".result" -mkdir cache stat -cd cache -echo "MAPPING: Split file..." -split -l 1000000 $InputFileName -echo "MAPPING: generating shell scirpts...." -for name in x* -do - - echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh - echo $name >>checklist -done -tasknum=`wc -l map.sh |tr -d "\n"` -perl $SrcFolder/batchqsub.pl map.sh - -echo "MAPPING: map $InputFileName to TXdb done...start to check.." -while [ 1 ] -do - if [ -f mapcheck.sh ];then - rm mapcheck.sh - fi - while read checklist - do - - name=`echo $checklist |tr -d "\n"` - echo "MAPPING: checking $name...." - readnum=`wc -l $name | cut -f1 -d" "` - if [ $faorfq == "fasta" ];then - readnum=`echo "$readnum/2"|bc` - else - readnum=`echo "$readnum/4"|bc` - fi - if [ -f $name.nomt ];then - rmapnum=`wc -l $name.nomt | cut -f1 -d" "` - else - rmapnum=0 - fi - if [ $rmapnum != $readnum ];then - echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh - - fi - done <checklist - if [ -f mapcheck.sh ];then - checktasknum=`wc -l mapcheck.sh |tr -d "\n"` - if [ $checktasknum == $tasknum ];then - echo "MAPPING: warning! none of the mapping tasks properly finished!" - fi - echo "MAPPING: resubmiting TASKS...." - perl $SrcFolder/batchqsub.pl mapcheck.sh - else - break - fi -done -echo "MAPPING: Mapping is really done....merging files..." - - -cat *.nomt >$Outputfolder/$TmpFolderName.nomt -cd ../../ - -rm $TmpFolderName.result -rf - |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/mark.mt.4eland.pl --- a/SpliceTrap/bin/mark.mt.4eland.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,56 +0,0 @@ -# this file is to convert mult mapped reads to nm reads by simply marked it as NM reads. -# its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM -# later, can be used to add information for dealing with this mult reads, for example, the coverage in the region - -use strict; -my $inputfilename=$ARGV[0]; -my $LongMarker="L"; -my $ShortMarker="S"; - - -open(input, $inputfilename); -while(my $line=<input>) -{ - #print "new line\n"; - chomp($line); - my @array = split("\t",$line); - my $match=$array[3]; - if( $array[2] eq "NM" or $match eq "") - { - print $line,"\n"; - next; - } - - my $marker=$LongMarker.$ShortMarker; - my @genome_pos; - #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g) - #this array is used to store the mapped position for this read - my @chr; - my @start; - my @end; - while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g) - { - push @chr, $1; - push @start, $2; - push @end, $3; - } - @chr=sort(@chr); - if (scalar(@chr)<=1) - { - print $line,"\n"; - next; - } - - @start=sort(@start); - @end=sort(@end); - if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000) - { - print $line, "\tMT\n"; - } - else - { - print $line,"\n"; - - } -} -close(input); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/rmap2eland.pl --- a/SpliceTrap/bin/rmap2eland.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,103 +0,0 @@ -use strict; - -my $rmapfilename=$ARGV[0]; -my $readsfilename=$ARGV[1]; -my $elandfilename=$ARGV[2]; - -my $detectformat=`head -c 1 $readsfilename`; - -#system("grep \"$detectformat\" $readsfilename |sort >$readsfilename.sort"); -system("awk 'NR%2==1' $readsfilename |sort >$readsfilename.sort"); -system("sort -k4,4 $rmapfilename >$rmapfilename.sort"); - - -open(readsfile, $readsfilename.".sort"); - - - -#$looplinenumbers=2 if ($detectformat eq ">"); -open(rmapfile, $rmapfilename.".sort"); -open(elandfile, ">".$elandfilename); - -while(my $rmapline=<rmapfile>) -{ - chomp($rmapline); - my ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline); - while(my $readline=<readsfile>) - { - if($readline=~/^$detectformat/) - { - chomp($readline); - my $readname=substr($readline, 1, length($readline)-1); - - - if($readname ne $rmapreadname) - { - print elandfile $readname,"\tNA\tNM\n"; - next; - } - else - { - my @mapped_ids=(); - my @mapped_pos=(); - my @mapped_strand=(); - push(@mapped_ids, $mapped_id); - push(@mapped_pos,$start); - push(@mapped_strand,$strand); - while(1) - { - $rmapline=<rmapfile>; - chomp($rmapline); - ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline); - if( $rmapreadname eq $readname ) - { - push(@mapped_ids, $mapped_id); - push(@mapped_pos,$start); - push(@mapped_strand,$strand); - } - else - { - seek(rmapfile, -1*length($rmapline)-1,1); - print elandfile $readname,"\t"; - print elandfile "NA\t"; - print elandfile scalar(@mapped_ids),":0:0\t"; - for(my $i=0;$i<@mapped_ids;$i++) - { - print elandfile "/",$mapped_ids[$i]; - print elandfile ":",$mapped_pos[$i]+1; - if($mapped_strand[$i] eq "+") - { - print elandfile "F0,"; - } - else - { - print elandfile "R0,"; - } - - } - print elandfile "\n"; - last; - } - } - last; - - } - } - } -} - -while(my $readline=<readsfile>) -{ - if($readline=~/^$detectformat/) - { - chomp($readline); - my $readname=substr($readline, 1, length($readline)-1); - print elandfile $readname,"\tNA\tNM\n"; - } -} - -close(elandfile); -close(rmapfile); - - -close(readsfile); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/scan_nomt.pl --- a/SpliceTrap/bin/scan_nomt.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,98 +0,0 @@ -use strict; -use FileHandle; - - -my $File1Name=$ARGV[0]; -my $File2Name=$ARGV[1]; - -my %FileHandle1; -my %FileHandle2; -my %chrlist; - -open(File1,$File1Name); -open(File2,$File2Name); - -while(my $line1=<File1>) -{ - my $line2=<File2>; - chomp($line1); - chomp($line2); - my $read1status=substr($line1,length($line1)-2,2); - my $read2status=substr($line2,length($line2)-2,2); - #next if( ($read1status eq "NM" or $read1status eq "MT") and ($read2status eq "NM" or $read2status eq "MT"); - my @array1=split("\t",$line1); - my @array2=split("\t",$line2); - my $chr1=""; - my $chr2=""; - if(scalar(@array1) eq 4) - { - if($array1[3]=~/(chr\S*?)\|/) - {$chr1=$1;} - } - if(scalar(@array2) eq 4) - { - if($array2[3]=~/(chr\S*?)\|/) - {$chr2=$1;} - } - my $chr=$chr1; - if ($chr eq "") - { - next if($chr2 eq ""); - $chr=$chr2; - } - else - { - next if($chr2 ne "" and $chr2 ne $chr); - - } - next if $chr eq ""; - if(exists $chrlist{$chr}) - { - my $fout1= $FileHandle1{$chr}; - my $fout2= $FileHandle2{$chr}; - if($read1status eq "MT") - { - print $fout1 $array1[0],"\t",$array1[1],"\tMT\n"; - } - else - { - print $fout1 $line1,"\n"; - } - if($read2status eq "MT") - { - print $fout2 $array2[0],"\t",$array2[1],"\tMT\n"; - } - else - { - print $fout2 $line2,"\n"; - } - - - - } - else - { - $chrlist{$chr}=1; - my $fout1= new FileHandle; - open($fout1, ">".$File1Name.".".$chr); - $FileHandle1{$chr}=$fout1; - - my $fout2= new FileHandle; - open($fout2, ">".$File2Name.".".$chr); - $FileHandle2{$chr}=$fout2; - - } - -} - -foreach my $fout1 (keys %FileHandle1) -{ - close($fout1); -} -foreach my $fout2 (keys %FileHandle2) -{ - close($fout2); -} - -close(File1); -close(File2); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/scanbed2txdb.pl --- a/SpliceTrap/bin/scanbed2txdb.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,490 +0,0 @@\n-#argv0: input transcript bed file\n-#argv1: output filename, will be in AS format\n-\n-use strict;\n-my $AnnoFileName = $ARGV[0];\n-my $outputFileName = $ARGV[1];\n-\n-if($AnnoFileName eq "" or $outputFileName eq "")\n-{\n-\tprint "TXDBGEN: Please specify your input files\\n";\n-\texit;\n-}\n-\n-my $cachefolder = `basename $outputFileName`;\n-chomp($cachefolder);\n-my $cachefolder = $cachefolder.".cache";\n-my $AnnoFileBase = `basename $AnnoFileName`; \n-chomp($AnnoFileBase);\n-\n-if(! -e $cachefolder)\n-{\n-\tmkdir $cachefolder or die "CHECK: cannot mkdir $cachefolder\\n";\n-\tprint "TXDBGEN: mkdir $cachefolder\\n";\n-}\n-\n-my $CacheAnnoFileName = $cachefolder."/".$AnnoFileBase.".sort";\n-#sort the annotation file\n-\n-print "TXDBGEN: sort $AnnoFileName \\n";\n-\n-system("sort -k6,6 -k1,1 -k2,2n -k3,3n $AnnoFileName >$CacheAnnoFileName");\n-\n-$AnnoFileName = $CacheAnnoFileName;\n-\n-#read the annotations into hashes\n-\n-open(AnnoFile, $CacheAnnoFileName) or die "can not open",$CacheAnnoFileName;\n-#split the genes into contigs\n-\n-my $contigid = 0;\n-my $end_tmp = 0;\n-my $chr_tmp = "chr";\n-my $strand_tmp="NA";\n-\n-my $TXnumtmp=0;\n-\n-my %eventlist =();\n-#my %eventlist_af = ();\n-#my %eventlist_al = ();\n-my %evidences = ();\n-\n-open(my $output,">$outputFileName");\n-\n-open(my $output2, ">$outputFileName.evi");\n-while(my $line =<AnnoFile>)\n-{\n-\tchomp($line);\n-\tmy @a = split("\\t",$line);\n-\tmy $chr = $a[0];\n-\tmy $start = $a[1];\n-\tmy $end = $a[2];\n-\tmy $name = $a[3];\n-\tmy @sizes = split(",",$a[10]);\n-\tmy $strand = $a[5];\n-\tmy @start_shifts = split(",",$a[11]);\n-\t#my $chrstr = $chr.$strand;\n-\tmy @starts;\n-\tmy @ends;\n-\tfor(my $i=0;$i<@start_shifts;$i++)\n- {\n- $starts[$i]=$start_shifts[$i]+$start;\n- $ends[$i] = $starts[$i]+$sizes[$i];\n- }\n-\tif($start >$end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand)\n- {\n- $contigid++;\n- #$ctgmultisonum++ if $TXnumtmp>1;\n- $TXnumtmp=0;\n-\t\tmy $annos = scanevents(\\%eventlist, "inner") ;\n-\t\t#my $annos_af = scanevents(\\%eventlist_af, "af");\n-\t\t#my $annos_al = scanevents(\\%eventlist_al, "al");\n-\t\t\n-\t\tmy $stdout = select ($output);\n-\t\tmy $eventids = printanno($annos,$chr_tmp,$strand_tmp) if(scalar(%$annos)>0);\n-\t\t#printanno($annos_af,$chr_tmp,$strand_tmp) if(scalar(%$annos_af)>0);\n-\t\t#printanno($annos_al,$chr_tmp,$strand_tmp) if(scalar(%$annos_al)>0);\n-\t\tselect($stdout);\n-\t\t$stdout = select ($output2);\n-\t\t#print cross information\n-\t\tforeach my $connect_str (keys %$eventids) \n-\t\t{\n-\t\t\tprint $eventids->{$connect_str},"\\t";\n-\t\t\tforeach my $transcriptid (keys %{$evidences{$connect_str}}) \n-\t\t\t{\n-\t\t\t\tprint $transcriptid,",";\n-\t\t\t}\n-\t\t\tprint "\\n";\n-\t\t}\n-\t\tselect($stdout);\n-\t\tprint "TXDBGEN: Contig ID $contigid at $chr $strand...\\n" if ( $contigid%1000 == 0);\n-\t\t%eventlist = ();\n-\t\t#%eventlist_al = ();\n-\t\t#%eventlist_af = ();\n- #print CacheContigFile "#ctg$contigid\\n";\n-\n- }\n-\t$TXnumtmp++;\n- $end_tmp = $end if ($end > $end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand);\n- $chr_tmp = $chr;\n-\t\n-\t$strand_tmp = $strand;\n-\t\n-\t#scan connections, 2 and 3 exons for CA/CS/AF\n-#\tif(scalar(@starts)>2)\n-#\t{\n-#\t\tmy $connectionstr_af =$starts[0]."-".\n-#\t\t\t\t$ends[0].",".\n-#\t\t\t\t$starts[1]."-".\n-#\t\t\t\t$ends[1]."," ;\n-#\t\n-#\t\t$eventlist_af{$connectionstr_af} = $starts[0];\n-#\t\tmy $connectionstr_al = $starts[scalar(@starts)-2]."-".\n-#\t\t\t\t$ends[scalar(@starts)-2].",".\n-#\t\t\t\t$starts[scalar(@starts)-1]."-".\n-#\t\t\t\t$ends[scalar(@starts)-1]."," ;\n-#\t\t$eventlist_al{$connectionstr_al} = $starts[scalar(@starts)-2];\n-#\t}\n-\t\t#didn\'t consider direction yet\n- #add 1 exon for IR\n-\tfor(my $n=1;$n<4;$n++)\n- {\n-\t\tfor(my $i=0;$i<scalar(@starts)-$n+1;$i++)\n- {\n-\t\t\tmy $connectionstr = "";\n-\t\t\tfor(my $j=$i;$j<$i+$n;$j++)\n- {\n- $connectionstr = $connectionstr .\n- $starts[$j]."-".\n- $en'..b'\t\tprint "0,";\n-\t\t\tprint $a[2]-$a[0],",";\n-\t\t\tprint $a[4]-$a[0],"\\n";\n-\t\t\t\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n- print $id.".".$num,"[S]\\t";\n-\t\t\tmy $connectstr_ca= $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n-\t\t\t$eventids{$connectstr_ca} = $id.".".$num."[S]";\n- print "0\\t";\n- print $strand,"\\t";\n- print $a[0],"\\t",$a[5],"\\t";\n- print "255,0,0\\t";\n- print "2\\t"; \n- print $a[1]-$a[0],",";\n- #print $a[3]-$a[2],",";\n- print $a[5]-$a[4],"\\t";\n- print "0,";\n- #print $a[2]-$a[0],",";\n- print $a[4]-$a[0],"\\n";\n-\t\t\t\n-\t\t}\n-\t\tif($annos->{$key} eq "ir")\n-\t\t{\n-\t\t\tmy @a=split(/[,-]/,$key);\n-\n-\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n-\t\t\tmy $id="IR-IR-$chrid"."-".$a[1]."-".$a[2];\n-\t\t\tmy $num=0;\n-\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n-\t\t\t$nums_per_isoform{$id}++;\n-\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n-\t\t\tprint $id.".".$num,"[L]\\t";\n-\t\t\tprint "0\\t";\n-\t\t\tprint $strand,"\\t";\n-\t\t\tprint $a[0],"\\t",$a[3],"\\t";\n-\t\t\t$eventids{$a[0]."-".$a[3].","} = $id.".".$num."[L]";\n-\t\t\tprint "255,0,0\\t";\n-\t\t\tprint "3\\t";\n-\t\t\tprint $a[1]-$a[0],",";\n-\t\t\tprint $a[2]-$a[1],",";\n-\t\t\tprint $a[3]-$a[2],"\\t";\n-\t\t\tprint "0,";\n-\t\t\tprint $a[1]-$a[0],",";\n-\t\t\tprint $a[2]-$a[0],"\\n";\n-\t\t\t\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n- print $id.".".$num,"[S]\\t";\n-\t\t\t$eventids{$key} = $id.".".$num."[S]";\n- print "0\\t";\n- print $strand,"\\t";\n- print $a[0],"\\t",$a[3],"\\t";\n- print "255,0,0\\t";\n- print "2\\t"; \n- print $a[1]-$a[0],",";\n- #print $a[3]-$a[2],",";\n- print $a[3]-$a[2],"\\t";\n- print "0,";\n- #print $a[2]-$a[0],",";\n- print $a[2]-$a[0],"\\n";\n-\t\t\n-\t\t}\n-\t\tif($annos->{$key} eq "ss")\n-\t\t{\n-\t\t\tmy @a=split(/[,-]/,$key);\n-\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n-\t\t\tmy $type="AA";\n-\t\t\tif( ($strand eq "+" && $a[1]==$a[2]) or ($strand eq "-" && $a[3]==$a[4]))\n-\t\t\t{\n-\t\t\t\t$type="AD";\n-\t\t\t}\n-\t\t\tmy $id="$type-$type-$chrid"."-".$a[2]."-".$a[3];\n-\t\t\tmy $connect_str_L = "";\n-\t\t\tmy $connect_str_S = "";\n-\t\t\tif($a[1]==$a[2])\n-\t\t\t{\n-\t\t\t\t$connect_str_L = $a[0]."-".$a[3].",".$a[4]."-".$a[5].",";\n-\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\t$connect_str_L = $a[0]."-".$a[1].",".$a[2]."-".$a[5].",";\n-\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[3]."-".$a[5].",";\n-\t\t\t\t\n-\t\t\t}\n-\t\t\tmy $num=0;\n-\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n-\t\t\t$nums_per_isoform{$id}++;\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n-\t\t\tprint $id.".".$num,"[L]\\t";\n-\t\t\t$eventids {$connect_str_L} = $id.".".$num."[L]";\n-\t\t\tprint "0\\t";\n-\t\t\tprint $strand,"\\t";\n-\t\t\tprint $a[0],"\\t",$a[5],"\\t";\n-\t\t\tprint "255,0,0\\t";\n-\t\t\tprint "3\\t";\n-\t\t\tprint $a[1]-$a[0],",";\n-\t\t\tprint $a[3]-$a[2],",";\n-\t\t\tprint $a[5]-$a[4],"\\t";\n-\t\t\tprint "0,";\n-\t\t\tprint $a[2]-$a[0],",";\n-\t\t\tprint $a[4]-$a[0],"\\n";\n-\t\t\t\n-\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n- print $id.".".$num,"[S]\\t";\n-\t\t\t$eventids {$connect_str_S} = $id.".".$num."[S]";\n- print "0\\t";\n- print $strand,"\\t";\n- print $a[0],"\\t",$a[5],"\\t";\n- print "255,0,0\\t";\n- print "2\\t"; \n- print $a[1]-$a[0],",";\n- #print $a[3]-$a[2],",";\n- print $a[5]-$a[4],"\\t";\n- print "0,";\n- #print $a[2]-$a[0],",";\n- print $a[4]-$a[0],"\\n";\n-\t\n-\t\n-\t\t}\n-\n-\t}\n-\treturn(\\%eventids);\n-\t\n-}\n-\n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/splitdb.sh --- a/SpliceTrap/bin/splitdb.sh Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,11 +0,0 @@ -workingfolder=$1; -cd $workingfolder -cut -f1 ../TXdb.bed |uniq >chr.list; -while read line ; -do - chr=`echo $line |tr -d "\n"`; - grep -w $chr ../TXdb.bed >$chr.bed; - split -3000 $chr.bed $chr.; - echo $chr....; - rm $chr.bed; -done <chr.list; |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/bin/vslz.pl --- a/SpliceTrap/bin/vslz.pl Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,46 +0,0 @@ -use strict; -exit; - -my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; -my $SrcFolder=$config{SrcFolder}; - -my $BedFileName=$SrcFolder."/db/TXdb.1101.bed"; -my $RatioFileName=$ARGV[0]; -my $BedFileOutName=$ARGV[1]; -my %Ratios; - -open(RatioFile, $RatioFileName); - -while(my $RatioLine=<RatioFile>) -{ - chomp($RatioLine); - my @array=split("\t",$RatioLine); - $Ratios{$array[0]}=$array[2] if $array[14] eq "passed"; -} - -close(RatioFile); - -open(BedFile, $BedFileName); -open(BedFileOut,">".$BedFileOutName.".bed"); -print BedFileOut "track name=$BedFileOutName discription=$BedFileOutName useScore=1\n"; -while(my $BedLine=<BedFile>) -{ - my @array=split("\t",$BedLine); - $array[3]=~/^(\S*)\[([LS])\]/; - my $id=$1; - my $LS=$2; - #print $LS,"\n"; - next if not exists $Ratios{$id}; - $array[4]=$Ratios{$id}; - $array[4]=1-$Ratios{$id} if( $LS eq 'S'); - - $array[4]=sprintf("%.0f",$array[4]*1000); - for (my $i=0;$i<@array; $i++) - { - print BedFileOut $array[$i]; - print BedFileOut "\t" if $i<scalar(@array)-1; - } - #print BedFileOut "\n"; -} -close(BedFileOut); -close(BedFile); |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/cutoffs/cutoff.pair.06.txt --- a/SpliceTrap/cutoffs/cutoff.pair.06.txt Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,111 +0,0 @@ -9 2.7 0.60603248295953 0.171888000000001 -18 2 0.60524889781465 0.183333 -27 2 0.614125387586727 0.176226000000001 -36 1.9 0.607174152514097 0.174107 -45 1.8 0.600015884070526 0.175151 -54 1.5 0.606211216622966 0.181161 -63 1.5 0.630096537351149 0.17829 -72 1.4 0.600041179609881 0.181678 -81 1.4 0.604419014379505 0.180618 -90 1.4 0.605503082593424 0.179083 -99 1.3 0.615709740786329 0.177374 -108 1.3 0.60153811996046 0.178839 -117 1.3 0.624394647716218 0.172525 -126 1.3 0.607071122577313 0.179287 -135 1.2 0.604935986506864 0.180048 -144 1.2 0.606377388150568 0.179824 -153 1.1 0.609872307866415 0.172894 -162 1.1 0.605221663116349 0.176559 -171 1.1 0.60084962792178 0.177017 -180 1.1 0.606042806460085 0.179842 -189 1.1 0.639661884576477 0.165058000000001 -198 1 0.63233306074533 0.172839 -207 1 0.620992090280475 0.17554 -216 1 0.617967295839559 0.177413000000001 -225 1 0.643160025381914 0.165310000000001 -234 1 0.664093921751576 0.161004 -243 1 0.617444847253655 0.179608 -252 0.9 0.600141496641319 0.18301 -261 0.9 0.61458063483465 0.175533 -270 0.9 0.631646650541943 0.175805 -279 0.9 0.633124568165035 0.169468 -288 0.9 0.611511089439885 0.172566 -297 0.9 0.620072975883583 0.176992 -306 0.8 0.632771460485054 0.177489 -315 0.8 0.616480687227451 0.177778 -324 0.8 0.627081395066885 0.173799 -333 0.8 0.637882935696159 0.166883 -342 0.8 0.601440520784007 0.181724 -351 0.8 0.6308436890493 0.169283000000001 -360 0.8 0.639014375873075 0.164053 -369 0.8 0.608995842436592 0.182319 -378 0.7 0.61599095431047 0.179065 -387 0.7 0.632637490417973 0.173393000000001 -396 0.7 0.611914615604433 0.174309000000001 -405 0.7 0.636212590337212 0.168285 -414 0.7 0.601768900444485 0.17904 -423 0.7 0.652306181407458 0.165425 -432 0.7 0.61284769661545 0.17678 -441 0.7 0.620417630501693 0.179685000000001 -450 0.7 0.630731905615349 0.172766000000001 -459 0.7 0.610688367962893 0.174217 -468 0.7 0.614116817389574 0.169074 -477 0.7 0.627626017337352 0.170694000000001 -486 0.7 0.625196910435048 0.173867 -495 0.7 0.624173913851294 0.170272 -504 0.7 0.600827986205662 0.176072 -513 0.7 0.60691053900984 0.173314 -522 0.7 0.637574555072294 0.164769 -531 0.7 0.623224687213309 0.173131 -540 0.7 0.606854298464756 0.17057 -549 0.7 0.602771105882753 0.176564 -558 0.7 0.635304573393458 0.165681 -567 0.7 0.614272507442932 0.178017 -576 0.7 0.647638749954493 0.161989 -585 0.7 0.635927720935647 0.166479000000001 -594 0.7 0.654872976893549 0.165962 -603 0.6 0.623281215201779 0.169602 -612 0.6 0.620036715664558 0.174000000000001 -621 0.6 0.6207915344063 0.173405000000001 -630 0.6 0.605666404221127 0.172404000000001 -639 0.6 0.605282217899342 0.171117 -648 0.6 0.605873079749684 0.173455 -657 0.6 0.622135854785987 0.171086 -666 0.6 0.609650459959578 0.180212 -675 0.6 0.615727466486383 0.175395 -684 0.6 0.650390040594351 0.157856 -693 0.6 0.600004956285639 0.17354 -702 0.6 0.619480037234101 0.169091000000001 -711 0.6 0.620537197134915 0.170687 -720 0.6 0.63097256862101 0.168571 -729 0.6 0.616862968480703 0.176372 -738 0.6 0.619853798956006 0.169892 -747 0.6 0.668527865891001 0.157351 -756 0.6 0.609299339958315 0.173282 -765 0.6 0.606997082083795 0.176418 -774 0.6 0.60385659737608 0.178646 -783 0.6 0.618166146904953 0.170539000000001 -792 0.6 0.655214353317497 0.162477 -801 0.6 0.667723279232349 0.161535 -810 0.6 0.641501812796978 0.165034000000001 -819 0.6 0.627258070106693 0.167323 -828 0.6 0.611937213679756 0.173702 -837 0.6 0.637164577326402 0.165999 -846 0.6 0.609589936966381 0.16902 -855 0.6 0.605378844036658 0.177104 -864 0.6 0.603894805290932 0.174897 -873 0.6 0.61196823740213 0.177185000000001 -882 0.6 0.645164364920278 0.169894 -891 0.6 0.616946196689952 0.164449 -900 0.6 0.620119902403488 0.173101 -909 0.6 0.612352323569787 0.175357000000001 -918 0.6 0.604523310198474 0.181906 -927 0.6 0.639238406673874 0.161621 -936 0.6 0.626807582940753 0.177092000000001 -945 0.6 0.649699083584984 0.166011 -954 0.6 0.630080161768358 0.175451 -963 0.6 0.634570582339133 0.174267 -972 0.6 0.613974438568147 0.171149000000001 -981 0.6 0.646832147519767 0.16271 -990 0.6 0.611137442619996 0.170677 -999 0.6 0.609074843250805 0.177417 |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/cutoffs/cutoff.pair.07.txt --- a/SpliceTrap/cutoffs/cutoff.pair.07.txt Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,111 +0,0 @@ -9 3.8 0.723195172634673 0.136373 -18 3.1 0.704962064416613 0.145611 -27 2.7 0.707557510009516 0.148332 -36 2.7 0.727407566746925 0.139424000000001 -45 2.6 0.700849445263879 0.141565 -54 2.5 0.722912449885175 0.135534 -63 2.2 0.700978069072888 0.148155 -72 2.1 0.701953684982546 0.145638 -81 2.1 0.714519324839634 0.144058 -90 2 0.706725312179025 0.143797 -99 1.8 0.711963141532287 0.146397 -108 1.8 0.735577668185521 0.136275 -117 1.7 0.702810428805111 0.149303 -126 1.7 0.720543078508623 0.142071 -135 1.7 0.70149933958699 0.145076 -144 1.7 0.71795624649094 0.139588 -153 1.7 0.704835091689678 0.148916 -162 1.5 0.701545731183728 0.149502 -171 1.5 0.724190117580076 0.139706 -180 1.5 0.713150501847426 0.146353 -189 1.4 0.701931396757465 0.150412 -198 1.4 0.701219389501589 0.150832 -207 1.4 0.709880139643964 0.146954 -216 1.4 0.745895944323654 0.133521 -225 1.4 0.708094624059256 0.144051000000001 -234 1.4 0.714653417236785 0.141035 -243 1.4 0.702278770668715 0.146489 -252 1.4 0.714249389046789 0.140401 -261 1.4 0.720916794007688 0.140816 -270 1.4 0.704112631476039 0.140986 -279 1.4 0.725725706846333 0.140364 -288 1.4 0.713439724527154 0.142329 -297 1.3 0.703874456990055 0.145868 -306 1.3 0.714083967774953 0.138665 -315 1.3 0.717635749171277 0.139297 -324 1.3 0.70800987989496 0.14435 -333 1.3 0.703608941539679 0.143911 -342 1.3 0.712538846663198 0.142044 -351 1.3 0.736517626110649 0.137995 -360 1.3 0.752400150484099 0.133463 -369 1.3 0.700408117809905 0.149221 -378 1.2 0.710876753570226 0.145758 -387 1.2 0.719606098956021 0.141686 -396 1.1 0.701573537304199 0.149306 -405 1.1 0.711449711776699 0.146484 -414 1.1 0.7219174559929 0.140383 -423 1.1 0.711374621895577 0.147498 -432 1.1 0.706470648418195 0.146516 -441 1.1 0.702119897887331 0.144641 -450 1.1 0.710888241224795 0.14668 -459 1.1 0.700236191312255 0.145443 -468 1.1 0.709289110346942 0.145098000000001 -477 1.1 0.723682944143134 0.137715000000001 -486 1.1 0.711909258694556 0.148433000000001 -495 1.1 0.706050645629693 0.146412 -504 1.1 0.71026622444805 0.142836 -513 1.1 0.721840069030837 0.140123 -522 1.1 0.721616423306577 0.139145 -531 1.1 0.706599106603714 0.144744 -540 1.1 0.732906503371939 0.138953 -549 1.1 0.710747551039788 0.146028 -558 1.1 0.72403698505923 0.141907 -567 1.1 0.703858944181491 0.142942 -576 1.1 0.732227703846385 0.137018 -585 1.1 0.706081331511759 0.145895 -594 1.1 0.72851699215126 0.140481 -603 1.1 0.735776902337646 0.13646 -612 1 0.703269143581204 0.150933 -621 1 0.705868277579243 0.149988 -630 1 0.735304036557552 0.133891 -639 1 0.701919162079755 0.140775 -648 1 0.714774599695102 0.142638 -657 1 0.702484973766457 0.145888 -666 1 0.714275579611685 0.144289 -675 1 0.704751859660089 0.148070000000001 -684 1 0.725303040977116 0.139712 -693 1 0.7387858141582 0.134658 -702 1 0.737133543332787 0.135015 -711 1 0.71646444290315 0.140297 -720 1 0.735722464691124 0.136141 -729 1 0.709409780786399 0.144924000000001 -738 1 0.732557952972944 0.137912 -747 1 0.70280408850191 0.148749 -756 1 0.736014898114798 0.13601 -765 1 0.719075344267219 0.141568000000001 -774 1 0.705605602403474 0.142286 -783 1 0.714188898670159 0.143287 -792 1 0.708351298492815 0.14399 -801 1 0.701618595782959 0.140845 -810 1 0.70444245319677 0.142743000000001 -819 1 0.701265538602256 0.146322 -828 1 0.731290352078885 0.136012 -837 1 0.717812366249433 0.137717 -846 1 0.724767043040464 0.142316 -855 1 0.742408530369704 0.132985 -864 1 0.715927851943484 0.141312 -873 1 0.70674171896429 0.145039 -882 0.9 0.705311244948852 0.148678 -891 0.9 0.725745115884173 0.136097 -900 0.9 0.717545411117844 0.145251 -909 0.9 0.71139239340832 0.147716 -918 0.9 0.724110435852362 0.14723 -927 0.9 0.730304270252922 0.142741000000001 -936 0.9 0.702790994935148 0.14686 -945 0.9 0.714312901014806 0.142984 -954 0.9 0.718970705842182 0.14072 -963 0.9 0.702918434736422 0.146811 -972 0.9 0.763309367972586 0.135241 -981 0.9 0.70933162192829 0.14406 -990 0.9 0.714425781696018 0.142589 -999 0.9 0.707307109948989 0.146265 |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/cutoffs/cutoff.pair.08.txt --- a/SpliceTrap/cutoffs/cutoff.pair.08.txt Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,111 +0,0 @@ -9 5.7 0.807699596880158 0.11075 -18 4.8 0.812102488456895 0.109521 -27 4.8 0.807059148683326 0.109122 -36 4.5 0.810359129334262 0.106039 -45 4.1 0.800899562591127 0.109428 -54 4 0.819734419515034 0.104148 -63 3.6 0.801137818633087 0.112097 -72 3.5 0.805611266779768 0.107837 -81 3.2 0.804721543883597 0.110678 -90 3.2 0.81977543427128 0.105428 -99 3.2 0.80949445276013 0.109698 -108 3.1 0.800113619307504 0.115339 -117 2.9 0.80163357132874 0.111922 -126 2.9 0.807430653105459 0.110639 -135 2.8 0.809101610526483 0.109881 -144 2.8 0.808015366939668 0.109303 -153 2.8 0.815481435245632 0.106845 -162 2.7 0.813972734460064 0.108256 -171 2.7 0.803878583908181 0.111767 -180 2.7 0.814908823498375 0.110845 -189 2.5 0.803934003900257 0.112334 -198 2.5 0.819507987133127 0.106736 -207 2.5 0.816975135145848 0.109487 -216 2.5 0.801969702289987 0.11426 -225 2.5 0.814854242652841 0.106264 -234 2.5 0.809009743289239 0.110027000000001 -243 2.5 0.816859180637723 0.10951 -252 2.5 0.825923196681225 0.104742 -261 2.3 0.805863672217281 0.108068 -270 2.3 0.812114384187481 0.110547 -279 2.3 0.811109163497932 0.109461 -288 2.3 0.80680260883464 0.104169 -297 2.3 0.800054845624205 0.110627 -306 2 0.80196269021579 0.117711 -315 2 0.809945512786952 0.109737 -324 2 0.819364610024312 0.110094 -333 2 0.812712294607599 0.111396 -342 2 0.801568817408624 0.113051 -351 2 0.804296495631151 0.112833 -360 2 0.809648638295373 0.106319 -369 2 0.810216908330331 0.110967 -378 2 0.80081744573067 0.112898 -387 2 0.809614612996421 0.114576 -396 2 0.800940767696918 0.109701 -405 1.8 0.800397828290365 0.115299 -414 1.8 0.80375463158553 0.116013 -423 1.8 0.80204726507529 0.110069 -432 1.8 0.829673313493101 0.109584 -441 1.8 0.801512586472861 0.110591 -450 1.8 0.801317568038407 0.11341 -459 1.8 0.806266605408301 0.11489 -468 1.8 0.804368073911341 0.112083 -477 1.8 0.80034140400705 0.113126 -486 1.8 0.803037870942011 0.112527 -495 1.8 0.816147055378533 0.113451 -504 1.8 0.804009255355535 0.113167 -513 1.8 0.809536527236661 0.111121 -522 1.8 0.810250462485448 0.113045 -531 1.8 0.813388275601197 0.117925 -540 1.8 0.810529197259932 0.111664 -549 1.8 0.80286846045568 0.114872 -558 1.8 0.805231767462103 0.112439 -567 1.8 0.803715104541846 0.112076 -576 1.8 0.800856311237744 0.113416 -585 1.8 0.815485369320364 0.113187 -594 1.8 0.810404807750137 0.111576 -603 1.8 0.80944688874663 0.114048 -612 1.8 0.813671662890222 0.107955 -621 1.8 0.805581198188585 0.111274 -630 1.5 0.801229670148542 0.121577 -639 1.5 0.812201198793304 0.109719 -648 1.5 0.80667869232878 0.111841 -657 1.5 0.817613487850624 0.110405 -666 1.5 0.816866335035283 0.107114 -675 1.5 0.8101804833249 0.117018 -684 1.5 0.818509280416726 0.115303 -693 1.5 0.80107589244594 0.116714 -702 1.5 0.829601652947073 0.107173 -711 1.5 0.820953668506561 0.108043 -720 1.5 0.811426464962464 0.114567 -729 1.5 0.800211543269215 0.1163 -738 1.5 0.814971683053337 0.111887 -747 1.5 0.810647742818646 0.111346 -756 1.5 0.823589356371534 0.1075 -765 1.5 0.808183381146845 0.115074 -774 1.5 0.804024248189691 0.11739 -783 1.5 0.809214446523965 0.114048 -792 1.5 0.816922307151774 0.116292 -801 1.5 0.817083370511117 0.107909 -810 1.5 0.803218112416446 0.122658 -819 1.5 0.801792365687339 0.115227 -828 1.5 0.824077216658734 0.106899 -837 1.5 0.808010406353855 0.115016 -846 1.5 0.805503181301868 0.111932 -855 1.5 0.810848938809874 0.114185 -864 1.5 0.80077383728855 0.11448 -873 1.5 0.804510616030174 0.115034 -882 1.5 0.800413194808912 0.119586 -891 1.5 0.820672192506787 0.110138 -900 1.5 0.810326379209945 0.112103 -909 1.5 0.811096513155687 0.117781 -918 1.5 0.804732736125644 0.117636 -927 1.5 0.807950118492599 0.113333 -936 1.5 0.807915776288532 0.113945 -945 1.5 0.832853748697962 0.107864 -954 1.5 0.819637006281129 0.112646 -963 1.5 0.807519443743277 0.110865 -972 1.5 0.816853380970702 0.113831 -981 1.5 0.802684401572777 0.116302 -990 1.5 0.809446019377896 0.112676 -999 1.5 0.804655052306582 0.112474 |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/refGenes.bed --- a/SpliceTrap/refGenes.bed Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| b'@@ -1,66259 +0,0 @@\n-chr1\t67092175\t67134971\tNM_001276351\t0\t-\t67093004\t67127240\t0\t8\t1429,187,70,113,158,92,86,42,\t0,3059,4076,23176,33576,34990,38966,42754,\n-chr1\t67092175\t67134971\tNM_001276352\t0\t-\t67093579\t67127240\t0\t9\t1429,70,145,68,113,158,92,86,42,\t0,4076,11062,19401,23176,33576,34990,38966,42754,\n-chr1\t67092175\t67134971\tNR_075077\t0\t-\t67134971\t67134971\t0\t10\t1429,70,145,68,143,113,158,92,86,42,\t0,4076,11062,19401,21438,23176,33576,34990,38966,42754,\n-chr1\t201283451\t201332993\tNM_000299\t0\t+\t201283702\t201328836\t0\t15\t453,104,395,145,208,178,63,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36364,36815,38526,39561,40976,41489,42302,45310,46622,\n-chr1\t201283451\t201332993\tNM_001005337\t0\t+\t201283702\t201328836\t0\t14\t453,104,395,145,208,178,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36815,38526,39561,40976,41489,42302,45310,46622,\n-chr1\t8352403\t8423687\tNM_001042682\t0\t-\t8355086\t8364133\t0\t13\t2717,181,147,721,223,1379,114,162,200,93,163,81,127,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,71157,\n-chr1\t8352403\t8817640\tNM_001042681\t0\t-\t8355086\t8656297\t0\t23\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,464756,\n-chr1\t8352403\t8817640\tNM_012102\t0\t-\t8355086\t8656297\t0\t24\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,185,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,440000,464756,\n-chr1\t33513998\t34165274\tNM_001281956\t0\t-\t33519517\t34165097\t0\t71\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,189,174,249,174,195,186,189,147,189,114,81,146,178,189,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,364,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,43735,45301,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,650912,\n-chr1\t33513998\t34165842\tNM_052896\t0\t-\t33519517\t34165813\t0\t70\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,174,174,195,186,189,147,189,114,81,146,178,189,126,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,96,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,91864,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,651748,\n-chr1\t41847188\t42035925\tNR_038261\t0\t-\t42035925\t42035925\t0\t4\t1257,219,112,119,\t0,1682,71224,188618,\n-chr1\t75202130\t75611114\tNM_001130058\t0\t-\t75203726\t75541447\t0\t24\t1703,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,49,39,82,75,\t0,9337,11574,11788,12474,13623,15735,16359,17126,17669,20230,25595,31855,34856,36382,39870,40755,49079,72827,98481,137451,194452,339304,408909,\n-chr1\t75202132\t75611116\tNM_001320285\t0\t-\t75203726\t75300660\t0\t26\t1701,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,91,49,39,67,82,77,\t0,9335,11572,11786,12472,13621,15733,16357,17124,17667,20228,25593,31853,34854,36380,39868,40753,49077,72825,98479,109482,137449,194450,196222,339302,408907,\n-chr1\t75202132\t75611116\tNM_001320287\t0\t-\t75203726\t75300660\t0\t25\t1701,85,'..b'0123\t230123\t0\t8\t106,146,111,89,40,166,208,100,\t0,3710,5076,6459,7823,9904,12166,12494,\n-chr15_KI270727v1_random\t241092\t241174\tNR_049886\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n-chr15_KI270727v1_random\t241092\t241174\tNR_049895\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n-chr15_KI270727v1_random\t241092\t241174\tNR_128721\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n-chr15_KI270727v1_random\t372321\t373405\tNM_001004719\t0\t+\t372419\t373361\t0\t1\t1084,\t0,\n-chr16_KI270728v1_random\t17231\t19833\tNM_001099687\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNM_016212\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110886\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110897\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110910\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n-chr16_KI270728v1_random\t17231\t19833\tNR_110911\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n-chr16_KI270728v1_random\t17233\t19273\tNM_001205259\t0\t+\t17263\t18809\t0\t2\t392,477,\t0,1563,\n-chr16_KI270728v1_random\t17233\t19273\tNR_110914\t0\t+\t19273\t19273\t0\t3\t392,289,477,\t0,1062,1563,\n-chr16_KI270728v1_random\t17250\t19837\tNM_001330061\t0\t+\t17263\t18809\t0\t2\t375,1041,\t0,1546,\n-chr16_KI270728v1_random\t17251\t19833\tNM_001330066\t0\t+\t17263\t18809\t0\t2\t374,1037,\t0,1545,\n-chr16_KI270728v1_random\t933855\t936466\tNR_110898\t0\t-\t936466\t936466\t0\t3\t1039,290,268,\t0,1251,2343,\n-chr16_KI270728v1_random\t1001629\t1037618\tNR_135178\t0\t+\t1037618\t1037618\t0\t25\t105,103,322,130,110,172,175,173,104,151,211,133,128,196,134,396,152,192,188,193,183,182,148,197,233,\t0,1009,5706,6177,6753,7361,8561,11932,12489,13586,16444,16928,17168,17388,20254,20921,21434,22284,28770,30290,31190,34103,35109,35455,35756,\n-chr16_KI270728v1_random\t1331814\t1346848\tNR_130771\t0\t-\t1346848\t1346848\t0\t3\t5057,93,295,\t0,6640,14739,\n-chr16_KI270728v1_random\t1331814\t1346848\tNR_130772\t0\t-\t1346848\t1346848\t0\t2\t5057,295,\t0,14739,\n-chr17_GL000205v2_random\t54856\t57966\tNR_003682\t0\t-\t57966\t57966\t0\t1\t3110,\t0,\n-chr22_KI270731v1_random\t69141\t86923\tNR_003267\t0\t+\t86923\t86923\t0\t13\t189,171,135,81,193,158,150,137,188,128,113,114,280,\t0,732,4094,4362,9648,10233,11930,12603,16240,16660,16890,17084,17502,\n-chr22_KI270733v1_random\t122272\t135645\tNR_046235\t0\t+\t135645\t135645\t0\t1\t13373,\t0,\n-chr22_KI270733v1_random\t130203\t135280\tNR_003287\t0\t+\t135280\t135280\t0\t1\t5077,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_106782\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_128715\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_128716\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t121580\t121672\tNR_128717\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n-chr22_KI270733v1_random\t125128\t125189\tNR_037458\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t125128\t125189\tNR_128714\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t125930\t127799\tNR_003286\t0\t+\t127799\t127799\t0\t1\t1869,\t0,\n-chr22_KI270733v1_random\t128876\t129032\tNR_003285\t0\t+\t129032\t129032\t0\t1\t156,\t0,\n-chr22_KI270733v1_random\t170214\t170275\tNR_037458\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t170214\t170275\tNR_128714\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n-chr22_KI270733v1_random\t171011\t172880\tNR_003286\t0\t+\t172880\t172880\t0\t1\t1869,\t0,\n-chr22_KI270733v1_random\t173955\t174111\tNR_003285\t0\t+\t174111\t174111\t0\t1\t156,\t0,\n-chr22_KI270734v1_random\t72453\t74335\tNR_136575\t0\t+\t74335\t74335\t0\t1\t1882,\t0,\n-chr22_KI270734v1_random\t90958\t98408\tNR_136574\t0\t-\t98408\t98408\t0\t4\t429,155,73,884,\t0,5076,5960,6566,\n-chr22_KI270734v1_random\t131493\t137393\tNM_005675\t0\t+\t131645\t136994\t0\t5\t262,161,101,141,549,\t0,342,3949,4665,5351,\n-chr22_KI270734v1_random\t138078\t161750\tNM_001195226\t0\t-\t138479\t156446\t0\t14\t589,89,99,176,147,93,82,80,117,65,150,35,209,62,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23610,\n-chr22_KI270734v1_random\t138078\t161852\tNM_016335\t0\t-\t138479\t161586\t0\t15\t589,89,99,176,147,93,82,80,117,65,150,35,209,313,164,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23235,23610,\n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/splice_trap.xml --- a/SpliceTrap/splice_trap.xml Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,35 +0,0 @@ -<tool id="splice_trap" name="SpliceTrap" version="1.0.0"> - <description>A statistic tool for quantifying exon inclusion ratios in paired-end RNA-seq data, with broad applications for the study of alternative splicing. - </description> - <requirements> - <requirement type="package" version="1.2.1.1">bowtie</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - perl $__tool_directory__/SpliceTrap.pl -p 8 -l $__tool_directory__ -d hg38v3 -1 "$input1" -2 "$input2" -s "$read_size" "$output1" "$output2" - ]]></command> - <inputs> - <param type="data" name="input1" format="fastq" /> - <param type="data" name="input2" format="fastq" /> - <param name='read_size' type='integer' value='50' label="Read size" /> - </inputs> - <outputs> - <data name="output1" format="txt" /> - <data name="output2" format="txt" /> - </outputs> - <tests> - <test> - <param name="input1" value="input1.fastq"/> - <param name="input2" value="input2.fastq"/> - <output name="output1" file="output1.txt"/> - <output name="output2" file="output2.txt"/> - </test> - </tests> - <help> - **SpliceTrap** - </help> - <citations> - <citation type="bibtex"> - http://rulai.cshl.edu/splicetrap - </citation> - </citations> -</tool> |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/src/Makefile --- a/SpliceTrap/src/Makefile Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,3 +0,0 @@ -all: - g++ -O2 splicetrap.estimate.cpp -o Pair_estimate_c - mv Pair_estimate_c ../bin |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/src/splicetrap.estimate.cpp --- a/SpliceTrap/src/splicetrap.estimate.cpp Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,854 +0,0 @@\n-//Author: Jie Wu@CSHL\n-//TO replace the original Perl code\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <iostream>\n-#include <fstream>\n-#include <string.h>\n-#include <unistd.h>\n-#include <sstream>\n-#include <map>\n-#include <vector>\n-#include <math.h>\n-#include <time.h>\n-#include <limits>\n-#include <iomanip>\n-\n-using namespace std;\n-\n-int MAX_LINE_LEN = 1024*1024;\n-\n-void printusage()\n-{\n- cout<< "\\tUsage:"<<endl;\n- cout<<"\\t-s\\tread_size"<<endl;\n- cout<<"\\t-b\\tIRM file"<<endl;\n- cout<<"\\t-f\\tFZM file"<<endl;\n- cout<<"\\t-1\\tmapping result 1"<<endl;\n- cout<<"\\t-2\\tmapping result 2"<<endl;\n- cout<<"\\t-d\\tTXdb database file"<<endl;\n- cout<<"\\t-o\\toutput prefix"<<endl;\n- \n-}\n-\n-struct ReadPairInfo\n-{\n-\tbool isPaired;\n-\tvector<int> IsoFlag; //flag as 1 if mapped to the isoform\n-\tvector<int> Size; //fragment size in the isoform\n-\tvector<int> Pos;\n-};\n-\n-class TXdb_entry{\n-\tpublic:\n-\t string chrid;\n-\t //int start;\n-\t //int end;\n-\t string id;\n-\t bool iscomplete;\n-\t int start;\n-\t int end;\n-\t int exonstarts[3];\n-\t char strand;\n-\t int exonsize[3];\n-\t int len1;\n-\t int len2;\n-\t vector< ReadPairInfo > pairs;\n-\t int snum1; //single-end reads\n-\t int snum2;\n-\t int snum12;\n-\t float ir, bir;\n-\t int enum1; //exon body reads\n-\t int enum2;\n-\t int enum3;\n-\t \n-\t int jnum12;\n-\t int jnum23;\n-\t int jnum13;\n-\n-\t int totalreadnum;\n-\t void estimate(vector<double> &FZM, map<string, vector<double> > &IRM, int &read_size)\n-\t {\n-\t\t float Maxe=0;\n-\t\t float Max = -numeric_limits<float>::max();\n-\t\t float BMaxe=0;\n-\t\t float BMax=-numeric_limits<float>::max();\n-\t\t for(float e1=0.001; e1<1;e1=e1+0.001)\n-\t\t {\n-\t\t\t float efix1 = e1*len1/\n-\t\t\t (e1*exonsize[1]+len2);\n-\t\t\t float efix2 = 1-efix1;\n-\t\t\t float LL = snum12*log( efix1/(len1-read_size+1)\n-\t\t\t + efix2/(len2-read_size+1) )\n-\t\t\t + snum1*log(efix1/(len1-read_size+1)) \n-\t\t\t\t + snum2*log(efix2/(len2-read_size+1));\n-\t\t\t\t //cout<<LL<<endl;\n-\t\t\t\t //cout<<efix1<<"\\t" <<pairs.size()<<"\\t" <<len2<<"\\t"\t<<endl;\n-\t\t //int num1=0, num2=0,num12=0;\n-\t\t for(int i=0;i<pairs.size();i++)\n-\t\t\t {\n- if(pairs[i].IsoFlag[0] == 1)\n-\t\t\t {\n-\t\t\t\t if(pairs[i].IsoFlag[1] == 0)\n-\t\t\t\t {\n-\t\t\t\t\t // num1++;\n-\t\t\t\t\t float tmp=FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1);\n-\t\t\t\t\t if (tmp == 0)\n-\t\t\t\t\t {\n-\t\t\t\t\t \t LL = LL - 308;\n-\t\t\t\t\t }\n-\t\t\t\t\t else\n-\t\t\t\t\t {\n-\t\t\t\t\t\t LL = LL + log(tmp);\n-\t\t\t\t\t }\n-\t\t\t\t }\n-\t\t\t\t else\n-\t\t\t\t {\n-\t\t\t\t\t // num12++;\n-\t\t\t\t\t float tmp=(FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1)) \n-\t\t\t\t\t + (FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1));\n-\t\t\t\t \t if(tmp ==0)\n-\t\t\t\t\t {\n-\t\t\t\t\t LL = LL -308;\n-\t\t\t\t\t }\n-\t\t\t\t\t else\n-\t\t\t\t\t {\n-\t\t\t\t\t\t LL = LL + log(tmp);\n-\t\t\t\t\t }\n-\t\t\t\t }\n-\t\t\t }\n-\t\t\t else\n-\t\t\t {\n-\t\t\t\t //num2++;\n-\t\t\t\t float tmp = FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1);\n-\t\t\t\t if(tmp ==0)\n-\t\t\t\t {\n-\t\t\t\t LL= LL - 308; \n-\t\t\t\t }\n-\t\t\t\t else\n-\t\t\t\t {\n-\t\t\t\t\t LL = LL +log(tmp);\n-\t\t\t\t }\n-\t\t\t }\n-\t\t\t }\n-\t\t\t //cout<<num1<<"\\t"<<num2<<"\\t"<<num12<<"\\n";\n-\t\t\t //cout<<LL<<endl;\n-\t\t\t \n-\t\t\t if(IRM["CA"].size() > 0)\n-\t\t\t {\n-\t\t\t\t float BLL;\n-\t\t\t\t string eventtype =id.substr(0,2);\n-\t\t\t\t if(!eventtype.compare("ME"))\n-\t\t\t\t {\n-\t\t\t\t\t eventtype = "CA";\n-\t\t\t\t }\n-\t\t\t\t BLL = LL + log(IRM[eventtype][int(e1/0.001)]);\n-\t\t\t\t if(BLL > BMax)\n-\t\t\t\t { \n-\t\t\t\t\t BMax = BLL;\n-\t\t\t\t\t BMaxe = e1;\n-\t\t\t\t }\n-\t\t\t }\n-\t\t\t\t //cout<<LL<<endl;\n-\t\t\t if(LL > Max)\n-\t\t\t {\n-\t\t\t\t Max= LL;\n-\t\t\t\t Maxe = e1;\n-\n-\t\t\t }\n-\t\t\t\t \n-\t\t }\n-\t\t ir= Maxe;\n-\t\t bir= BMaxe;\n-\t }\n-\t\t\n-\t TXdb_entry(char *line)\n-\t {\n-\t\t size_t found;\n-\t\t string linestr(line);\n-\t\t found = linestr.find("[L]");\n-\t\t if(found != string::npos)\n-\t\t {\n-\t\t\t iscomplete=1;\n-\t\t\n-\t\t \tchar *token;\n-\n-\t\t \ttoken = strtok(line,"\\t");\n-\t\t \tif(token==NULL)\n-\t\t \t{\n-\t\t\t\t cout<<"error1"<<endl;\n-\t\t\t\t exit(0);\n-\t\t\t }\n-\t\t\t else\n-\t\t\t {\n-\t\t\t\t c'..b'gle_read_num<<endl;\n- cout<<"Paired reads used:\\t"<<total_pair_read_num<<endl;\n-\n- log_file<<"#total_read_num:\\t"<<total_read_num*2<<endl;\n- log_file<<"#total_single_read_num:\\t"<<total_single_read_num<<endl;\n- log_file<<"#total_pair_read_num:\\t"<<total_pair_read_num<<endl;\n- \n- //Calculate ratios and output\n- cout<<"Now doing estimations and output to "<<ratio_file_name<<"\\t"<<num_file_name<<endl;\n- ofstream ratio_file(ratio_file_name.c_str());\n- ofstream num_file(num_file_name.c_str());\n- map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.begin();\n- for(;txdb_it!=TXdb_entries.end();txdb_it++)\n- {\n-\t string eventid = txdb_it->first;\n-\t //cout<< eventid <<endl; \n-\t txdb_it->second.estimate(FZM,IRM,read_size);\n-/*\t ratio_file<< eventid<<"\\t"\n-\t \t\t<<setprecision(4)<<fixed\n-\t <<txdb_it->second.ir<<"\\t"\n-\t\t\t<<txdb_it->second.bir<<"\\t"\n-\t\t\t<<"NA\\tNA\\tNA\\t"\n-\t\t\t<<txdb_it->second.jnum12<<"\\t"\n-\t\t\t<<txdb_it->second.jnum23<<"\\t"\n-\t\t\t<<txdb_it->second.jnum13<<"\\t"\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"\n-\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"\n-\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"\n-\t\t\t<<txdb_it->second.exonsize[2]<<"\\t"\n-\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<endl;\n-\t\t*/\n-\t ratio_file<<eventid<<"\\t"\n-\t \t\t<<setprecision(4)<<fixed\n-\t\t\t<<txdb_it->second.ir<<"\\t" //1\n-\t\t\t<<txdb_it->second.bir<<"\\t" //2\n-\t\t\t<<txdb_it->second.chrid<<"\\t" //3\n-\t\t\t<<txdb_it->second.start<<"," //\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t" //4\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"//5\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"//6\n-\t\t\t<<txdb_it->second.strand<<"\\t"//7\n-\t\t\t<<txdb_it->second.jnum12<<"\\t"//8\n-\t\t\t<<txdb_it->second.jnum23<<"\\t"//9\n-\t\t\t<<txdb_it->second.jnum13<<"\\t"//10\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"//11\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"//12\n-\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"//13`\n-\t\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<"\\t"//14\n-\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"//15\n-\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"//16\n-\t\t\t<<txdb_it->second.exonsize[2]<<endl;//17\n-\n-\t num_file<< eventid<<"\\t"\n-\t\t\t<<txdb_it->second.chrid<<"\\t"\n-\t\t\t<<txdb_it->second.start<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t"\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n-\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"\n-\t\t\t<<txdb_it->second.strand<<"\\t"\n- <<txdb_it->second.enum1<<"\\t"\n-\t\t\t<<txdb_it->second.enum2<<"\\t"\n-\t\t\t<<txdb_it->second.enum3<<"\\t"\n-\t\t\t<<txdb_it->second.jnum12<<"\\t"\n-\t\t\t<<txdb_it->second.jnum23<<"\\t"\n-\t\t\t<<txdb_it->second.jnum13<<endl;\n- }\n- log_file.close();\n- ratio_file.close();\n- num_file.close();\n- clock_finish = clock();\n- cout<<"Done! time used:"<<(double)(clock_finish-clock_start)/CLOCKS_PER_SEC<<" seconds"<<endl;\n-\n- \n-}\n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/input1.fastq --- a/SpliceTrap/test-data/input1.fastq Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| b"@@ -1,200484 +0,0 @@\n-@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/1\n-ATCTTTTGTGGCTACAGTAAGTTCAATCTGAAGTCAAAACCAACCAATTT\n-+\n-5.544,444344555CC?CAEF@EEFFFFFFFFFFFFFFFFFEFFFEFFF\n-@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/1\n-TCCATACATAGGCCTCGGGGTGGGGGAGTCAGAAGCCCCCAGACCCTGTG\n-+\n-GFFFGFFBFCHHHHHHHHHHIHEEE@@@=GHGHHHHHHHHHHHHHHHHHH\n-@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/1\n-GTATAACGCTAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n-+\n-55*'+&&5'55('''888:8FFFFFFFFFF4/1;/4./++FFFFF=5:E#\n-@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/1\n-AACGGATCCATTGTTTCGAGAACGTGATCGCCCTCATCTACCTAGCCTCA\n-+\n-D<@DDA@A:AHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHBHHHHHHH\n-@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/1\n-GCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCACCATCGTGC\n-+\n-F@FFFGGFGFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHH\n-@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/1\n-TGGACAGTTGCTCCTGGCTCCAGAACCTGTCTTGCAAGGGACAGTGGGGT\n-+\n-A:AA@HHHHHHHHHHHHHHHHHHHIHHHHHHHHHHGF=GFHHHH@@?AA*\n-@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/1\n-ATTAGGAAACATGGAATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTT\n-+\n-@<><CHHHHHHHHHHHHHHHHHGGHHHHHHHHHHGGGHHHHHHHHGGGGG\n-@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/1\n-CAATAGCCAGATGGTTGGTGGGGCAGCCAGGCAGGGAGGACCCAGGGCTG\n-+\n-555544555544555;AAAAFFBBEEEE;=FCB9F===<<FFFFEFFEEE\n-@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/1\n-GTGCCAAATTGTCACATTCGAGCTTGAGGCTGTGGTACTGAGCTTGCAGT\n-+\n-D>BFD@@?>>54454?FFGFGGGGGGGGGGGGGEGGGGGGGGGEGGGGGG\n-@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/1\n-CCCGGCCTAACTTTCATTTAATTTCAATGAATTTTCTTTTTTTTTTTTTT\n-+\n-56455==@=>HHHHHHHHHGHHHHHHHHGH=HHHHHHEEEECEEEEEEEE\n-@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/1\n-GGAGAAGGGGCGAGGGAAGAAGACCTTTGCTATCCCAGATACCAGGACTG\n-+\n-55544145444/444GFDFG9A@@@DD>.F@><<=FDD@AGG>GGEGGEG\n-@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/1\n-CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n-+\n-FBDFFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHH\n-@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/1\n-ATGAGAAGGAGCCATCAGGACCTTATGAAAGCGACGAAGACAAGAGTGAT\n-+\n-55554DDFFFBBFFFHHGHHHHHHHHHHHHHHHHHHDHH8HHHHHHHHFH\n-@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/1\n-CGAAAACCAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCC\n-+\n-555,4&4551FFFFFBF3BDFFFFFFEFFFFBEFFFFFFDFFFFFFFFF=\n-@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/1\n-GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n-+\n-HGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/1\n-CAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCCTGCGACC\n-+\n-54-55A@A@@HHHHHFFGGE555558<=;=55555AAAA?HHHHH>8@@>\n-@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/1\n-CTTGCCTCACATGTCAGGGCAGGTATCCACCTAACCAGGCTGCAGGGGAG\n-+\n-555555544444544HHHHGHHHHHHHHHHHHHHHHHHHHHH5@HFFF*F\n-@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/1\n-CCAGCCTTGATACAGCATTTTCCACTTCTCTCTGTAGAGATCAGACGATT\n-+\n-55555555(5@>@=:@=8.@04554CCCCC.441445444-555445555\n-@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/1\n-CAGCATCCTGCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTGG\n-+\n-55555A@AAAGGEGGGGGGGGGGGGGGGGGGGCGGGFEGFGGGGFGGCGG\n-@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/1\n-CACCATCGTGCCCGTTCTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCC\n-+\n-5544455,0545445FFFEEFFFFFFFFFEEBC;D6<5-?FFFFFFFFFF\n-@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/1\n-CTCTCCTCTAACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGGCAGGT\n-+\n-55,34)4-53HHEHHGGGGG7DC?@GG;BGGEGGGGGGGGGGGGGGGGGA\n-@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/1\n-CAAAAATGTAGCTGCCCTGACCTGGTCTCCCCTGACCCTTCCACGGGGCT\n-+\n-56624545442525554455FFECECGEDGFF8DF###############\n-@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/1\n-GACAGATGATGTCCAAGCCCCTACATGCCCCAGACCCCAGGGCACGGCTG\n-+\n-##################################################\n-@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/1\n-ATCTCGTAGTACATCACATAGTGACGCTGCATCTCTGACTTCTCACTGGC\n-+\n-5653445555HHHHHHHHHH9;@=@HHHHHHDHHHHHHHHHHHHHHHHDH\n-@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/1\n-CACCATTTGACCCTGAGCCAG"..b':6601:197274#0/1\n-CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHH\n-@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/1\n-TGTGTCTTGTGCTAAACATTCCTTTCTCTCCGTGCCTCTGTCTCCCCTCT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/1\n-CAGCCCTCTCACCCTGGTACTGCATGCACGCAATGCTAGCTGCCCCTTTC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHEHHHGHIIHHHHHAHHHHHHHHHGH\n-@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/1\n-CTGGGACCCAGGCAGCTGCCACCTTGTCACCATGAGAGAATTTGGGGAGT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHG\n-@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/1\n-GGACTGTCCACCAGGTCCCGACGGGCAGGAATGCAGATGGGTACCTTTCC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHFHHHHHHHE\n-@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/1\n-GCCAGTGGTGGGCATGCGGCTGCGGAGCACGTCCTGAGCTGTGGGGACGT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHBDDBB@9@AAHHGHHHHHGHHDHHH\n-@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/1\n-CTACTCCTTCCGCAGCAGGGAGGTGTGCAGAGCCGTGCTCAGCTTCCTCT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHH8HAGFGGFHHHFGGHHHHHGHHHIHGH\n-@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/1\n-GTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGAGCCAT\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/1\n-GCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTC\n-+\n-55555?>?>>5555444555444442=5<=55444C=6C>2555551544\n-@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/1\n-CTGGGACCTGCGGGAGGGCCGCCAGCTGCAGCAGCATGACTTCAGCTCCC\n-+\n-HHHHHHHHHHHHHHHHHGEHHHHHHHCHHHFHHHHHEFGDFHHHEHBFHH\n-@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/1\n-AAACAAAACATTTTCCTTTGGGTTTTTTTTTTTCTTTCTTTTTTCTCCGC\n-+\n-HHHHHHGGGHHHHHHHHHHHHHHHHHHHGGGGGBGGGBHHGGGGGGGHHH\n-@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/1\n-GTTCCAACCACCGCCGGGGAGGGAGAGGGCCCCTGTCCCTGCAGGGCCGC\n-+\n-ADAD?DEFBEHHHHHCCDGDHCEEHCGBGAHHHHHCDCGD5555424554\n-@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/1\n-CCTGGAACTGCCTGACCATAGTCTGATTCTGCAGGTCCCAGACCACAATG\n-+\n-?ACDC?DDGG=DDD>55554GGFFADDDA==<==>D=DAD5445544445\n-@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/1\n-CACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTGTGCATCTCG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/1\n-CTGCCTGGCACGCACCCGGTGGCTGCACCATCCACACGCAAGACTGCAAC\n-+\n-HHHHHHHHHHHHHHHHHDHHHHHGHHFHHHHHHHHHHHFHHHFHGHFHHH\n-@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/1\n-CGGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAA\n-+\n-HHHHHHHGEHHHHHHHGGGGEHGHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/1\n-CGGGCTCCTCGCACCTACCCCAGCAACTCAAATTCACCACCTCGGACTCC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/1\n-ATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTTTTTTTTTTTTTTTTT\n-+\n-HHHHHHHGGHHHHGHHHGHC83=;><=@=<CCCCCCCCCCCCCCCCCCCC\n-@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/1\n-CCTGGGTCGCCCACCCTCACCCTGCTCCTCCCAGCTCAGCTAAGCTCGTC\n-+\n-HHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/1\n-GTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIH\n-@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/1\n-CACCAACCAGCCGCGGGCCGCGCAGCTGGTGGACAAGGACAGCACCTTCC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHGHH\n-@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/1\n-CAGAGCTTAGCGGGGGGCTGAGCTGGTGTCTTTGAACCTCTAGTCCCAGG\n-+\n-HHHHHHHHHHHHHHHCGGGHEHHFHHEHHHHHHHHHHHEHHHHHFHHHHH\n-@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/1\n-TGTGTGCCCCATTTCTCCATATAGTCTTCCTCAGGCAGGTCCTAGGTCCC\n-+\n-??DDDEDECC<=@><CCC@?<<<=@EGGGGG?GGGGCGCE>@@6=55554\n-@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/1\n-CCCAGGTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGA\n-+\n-HHHHHHHHHGHHHHHHHHGHHHHHHHHHHHHFHHHHHHHHHHGHHHHAHE\n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/input2.fastq --- a/SpliceTrap/test-data/input2.fastq Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| b"@@ -1,200484 +0,0 @@\n-@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/2\n-CGGATTTCAGCTACTGCAAGCTCAGTACCACAGCCTCAAGCTCGAATGTG\n-+\n-HH;HHHHHGHHHHHHHHHHGHDHEHHHHHEHHHHBHHFHHHHHHHHHD0F\n-@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/2\n-GAGTGCGAGGGAAGTCAGGGGAGGATCGCGAGGGAAGCCAGGGGAGGATC\n-+\n-HHHHHBF8G>&4555GGGGGHHGGEHHHHHHHHH=HHHHHHHHHHHGB9H\n-@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/2\n-AACCGGGGGACGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTT\n-+\n-@FEEH>==9=05544FGFGFHHHBHHHFHF>AAAAHHHHHHHEHHHHHHH\n-@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/2\n-TCAGCATGCTTCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTG\n-+\n-5515555/5515444FFHHHHHHHHHHHHHHHHHHHHHHHEHHHHGHH@H\n-@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/2\n-GGCCAAGCAGGTCACCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGC\n-+\n-HHHHFGHHHGFAFFFHHFHHHHH/HHHHGHHEHHEHGFHHDGF=AA=@@8\n-@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/2\n-TGGAGTCCTTCATGCCCAGGTCTGGAACCCAGGTTCTGACCCCAGGGCCC\n-+\n-FDFFFEGGGGHHHHGHHHHH>AAA8GGGGGHHHGHHHHHHHHHHHGFHHH\n-@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/2\n-GGCCAGCCGGGCTCCAGAGGGGTCAGGGCGCGACGAGAACCAACTCTTTA\n-+\n-FDFFBDFDDBAAADDGHGHHHHBHHHHHGHGHHHHHHHHHHHHHHHHHFH\n-@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/2\n-GGGCTCTCCCTCTGTATCGCCTGGGGAGGCTGCTGAGGTGACTTTTTGGA\n-+\n-A?DDABFBFFHGHEHHHHHHHHHIHHDHCC55555BFFCD;:9=;=@=><\n-@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/2\n-CACAGTAGGCGTTCTATAAATGTGTCACAAGAATGGCTTCCCTCAGGAAG\n-+\n-55444;@=@>HHHDHHHHHFFGHHHHHHHHHIHHHFH=HHBB?<D#####\n-@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/2\n-GCCTGGGCAACATAGCGAAACCACATCTCTACAAAAAAATCCTCCAAAAT\n-+\n-HGIEHHHHGHF=@FF8A>>@HFHH=HHHHHHHIHHHGGGGH@@HHGGGEG\n-@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/2\n-AGCCACTGCCTTTCTGCTCAGATGCTGGCACCTCCGCCCCCGGGGCTGCC\n-+\n-EHHHFF?GFDGFFB???DDAD<FC<55555FFGGG<?>>61/5444-555\n-@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/2\n-CGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAGCTGTCCC\n-+\n-HHHHHHHHHHHHHHHGHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHDHHH\n-@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/2\n-GGAGTCACAGGATTTGGAGGCAGGAGTGCTGGCGGGAAGGGCATTCAGGA\n-+\n-HHHHHHHFEH?=DDDHIFHHEHEDE?DAADH@FHHC'@CE##########\n-@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/2\n-CTCGGAAGGCAAGGCACATCTTGTGGTAGAAAATTTCGTGCAAATTAGGA\n-+\n-HHHHHGGH=IADDADHHGHH444-4A?A?AGHGHHFGFG@/5544HDHEE\n-@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/2\n-CTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTCCCATCTGACCCTCCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/2\n-GTTTAAAGGTGATACTTATTCTCGGAAGGCAAGGCACATCTTGTGGGAGA\n-+\n-EF;GG4445544544FF@FFEHFHFFHGHH####################\n-@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/2\n-GGGACAGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGA\n-+\n-FF8FFBFFFFFDF@FCD>CFF@@F:HEHEHHHHBHHHHHF==<>5?DDA;\n-@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/2\n-GTGAGGGTGGGCGACCCAGGATTCCCCCTCCCCTTCCCAAATAAAGATGA\n-+\n-BEFDB44(4411445DA?ADHHHHIFDDC>:::5@DDDC?HHHDEBFFB>\n-@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/2\n-CAGAGCGTAAGAAATGGATCCATTGTTCCGAGAACGTGATCGCCCTCATC\n-+\n-HH@HHHFDHHHHHHHFHHHGHGHHHHHHHGGHHHHHFHHAHHHHHGHHGH\n-@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/2\n-CCTCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n-+\n-GG/GGHHHHHHHHHHHHHHHHHHHDHHHHHFDHHHHHH@HHEHHHHHHHH\n-@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/2\n-GGCAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAAGG\n-+\n-FFGHH55,5514441>><<BHHEHFF?9F4FFFBFHHHHHHHHGHHFF4H\n-@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/2\n-CTTCGCAAATTTGTCCCAGGGATGGATCGCCTGTGCTGCCTTCGCCCGCC\n-+\n-D@5AA4453451444GGGFDHH@GEA;DDD=:=+:D@DFDEDHHB#####\n-@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/2\n-CCTGTCCAGAGTCTGAGGGGGGAGGCCAGGCCCTGCCTTGGGGTCTGAGG\n-+\n-##################################################\n-@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/2\n-GGGGGGCGCCGCAGCTGCGCGGCCGCTCCCTCCTAGCCGGCCCTTGAGGG\n-+\n-HHHHHHHEGHIHHHHHHDHF@@<A?FFE@FGGGAG4====HHHHHHHEHB\n-@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/2\n-CTGAATCCCTTGCCCAGAGGA"..b':6601:197274#0/2\n-CCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGH\n-@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/2\n-GGCCCTGCCCTTGACCCCACTACCCGTGGGGCTGCAGCCGCCTTCGCTGC\n-+\n-HHHHHHHHHHHHHHHHHHIHHGHHH>A??@FHHHFHHFHDHH=HHB>4FF\n-@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/2\n-CTTTATTTGGGAAGGGGAGGGGGAATCCTGGGTCGCCCACCCTCACCCTG\n-+\n-HHHHHHHGGHHHHHHHHFHGGGGGHHHHHHHGHHHHFHHEH9BHEDD###\n-@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/2\n-CGGCCGGCTGCATCCCACACCAGCCTGAGCCCCAGACGGTCAGTCAGTGC\n-+\n-HHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHH\n-@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/2\n-CGAGGGGTCCAGAGTGGAGAGAGCCCCGAGCAGGAGTGCATCTCCCTCGC\n-+\n-HHHHHHHFHHHHHHHFHHHHHHHHHHHHHHHGHHHHHHHHHHIHFHHHGH\n-@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/2\n-GGCTGCAGATTCCATTCAGCAGGCCCGAGAGCAAGCACCACGCTAGCCTG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHG\n-@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/2\n-CAAGACTGCAACTTCAGATGCTCCGCACGCTGGAGATGCTGGACAGGGGC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFHHHHHHHEHEHE\n-@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/2\n-CTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGAGGAGG\n-+\n-HHGHHHHHHHHHHHHHHHHHHGHHGHHHHHHHHHHHHHHHHHHHEHHHHG\n-@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/2\n-GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n-+\n-HF@GHD?>DA=<>;=444444245444445>>@>;BECBF@?A<>@AAA8\n-@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/2\n-GGACTGAGGACGACTCCTTGGACTGGAAAATGCTGGCCCCGTACGGCGTC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHIH\n-@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/2\n-GGAACCTTCTCCGGATTGGGTTCATGAGCATTTTTGTGGGTGTGTATGTG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHFH\n-@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/2\n-GGCCGTCTTTGACCTGCTCCTGGCTGTTGGCATTGCTGCCTACCCTGGCA\n-+\n-55555<@@@@===<655244A??DAC:C?#####################\n-@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/2\n-GGTGAGGCCAGCACCTTGTCCATTTGGGACCTGGCGGCGCCCACCCCCCG\n-+\n-5-5449=;==BFFBFDBFDDC>?>>D?DDDHHHHHBFFC@44244<<<<<\n-@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/2\n-ATGATGTTTCCACAAAGCAGGCATTCGGGCTCCTCGCACCTACCCCAGCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/2\n-TGCTGCGGGTGTCTCCGGCTGGGCATGCGGGGGCCCGGGGACTGCCTGGC\n-+\n-HHHHHHHHHHHHGHHHHFDEBDDBB5552*DDBBFHHHHH@FDF######\n-@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/2\n-CTGGTCTCCCATCTGACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFH\n-@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/2\n-GGACCTGGGCACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTG\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n-@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/2\n-GGTGGGGGTCGTGGAGTGGGGGAGGGAGGCCAGCCGGGCTCCAGAGGGGT\n-+\n-HHHHHHGGGHGBGEFHHHFHHG9GGC;HHEHHHCHFG@FFAA;=9DD;C7\n-@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/2\n-CCGTTTTGAACATGTGTAACCGACAGTCTGCCTGGGCCACAGCCCTCTCA\n-+\n-HHHHHHHHHHHHHHHHHHHHHHHHIHHHHHGHDHGHHHBHCFFFFHHHHH\n-@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/2\n-GGAAGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCT\n-+\n-HHHHIHIHHHHHGHHHHHHHFHDHFGHHHHHEHEHHHHHHHHHHHHGHHH\n-@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/2\n-GGAGGCTGAAGTGCTGGACAGCCACGTAGGCCATGCCGAGGTAGGCAGCA\n-+\n-HFHHHHHHHHHHIHGHHHHHHHHHHHHHHHHHEHHHHGGHH?FHHHHHGH\n-@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/2\n-GACATATTTGAGAGACACTGGGGAGACAGAATCGACCTGACCTTGCTGAC\n-+\n-HHHHHHHHHHHHHHHHHHHHHHH@HHHEHHHFHHHHHGHAHFBEHHGFBG\n-@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/2\n-GTGACACTGCATTGCTGCTGCCAGCACCCCTTGTTAGGGTTTGTAATTGC\n-+\n-F8HHHFGGG8DC>A>ADD1?##############################\n-@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/2\n-CTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGA\n-+\n-HHHHHHIHHHHHDHHGHHHGHHHHHHHHHHHHFGHHHHFHHDHHHCHHHH\n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/output1.txt --- a/SpliceTrap/test-data/output1.txt Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| b'@@ -1,9901 +0,0 @@\n-na\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102416384,102416631\t102416631,102417121\t102418700,102418759\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t102656105,102656137\t102656244,102656269\t102656269,102656385\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t103451082,103451182\t103451182,103451285\t103452155,103452405\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579368,106579474\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579090,106579251\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t110875965,110876070\t110881232,110881274\t110881274,110881535\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t112182793,112182887\t112182887,112182891\t112183692,112183779\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t112443514,112443594\t112443594,112443606\t112445169,112445650\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t112446987,112447467\t112448377,112448381\t112448381,112448489\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113146010,113146097\t113150982,113150997\t113150997,113151123\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t11320856,11320898\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t11314138,11314258\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113588823,113589079\t113589079,113589082\t113589665,113589797\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113679668,113679978\t113691963,113691996\t113691996,113692070\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t113697493,113697603\t113721000,113721031\t113721031,113721168\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t114129890,114129971\t114129971,114130669\t114131146,114131336\t-\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.0010\tchr10\t114447879,114448'..b'1387,94571429\t94573959,94574187\t94574553,94574736\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576709\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576805\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94577081,94577096\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94576661,94576709\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94577081,94577096\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94583987,94584125\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94582962,94583059\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94583987,94584125\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94576661,94576805\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94576661,94576709\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94588228,94588390\t94590412,94590552\t94590637,94590776\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-na\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0.0000\t0.0000\t0.0000\t0.0000\n-\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0.0000\t\t\t\n' |
| b |
| diff -r 3fc5784c1ce0 -r 2d7ec1cb4139 SpliceTrap/test-data/output2.txt --- a/SpliceTrap/test-data/output2.txt Thu Sep 07 14:12:28 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| b'@@ -1,9901 +0,0 @@\n-AA-AA-10-100261028-100261044.0\t0.0010\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t63\t16\t85\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100275490-100275493.0\t0.0010\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t2213\t3\t105\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100286213-100286332.0\t0.0010\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t119\t108\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100329324-100329346.0\t0.0010\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t114\t22\t618\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100523929-100524139.0\t0.0010\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t201\t210\t156\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100827542-100827561.0\t0.0010\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t87\t19\t2380\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-100983304-100984489.0\t0.0010\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t62\t1185\t1127\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101064205-101064260.0\t0.0010\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t562\t55\t161\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101136667-101136690.0\t0.0010\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t202\t23\t1099\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101601169-101601202.0\t0.0010\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t125\t33\t134\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101608834-101608866.0\t0.0010\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t73\t32\t71\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101608834-101608870.0\t0.0010\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t134\t36\t67\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101774879-101774912.0\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t87\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101774879-101774912.1\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t37\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101828696-101828717.0\t0.0010\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t70\t21\t125\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-101829897-101829942.0\t0.0010\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t54\t45\t96\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102138618-102138719.0\t0.0010\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t189\t101\t46\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102148627-102148633.0\t0.0010\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t6\t61\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102157185-102157188.0\t0.0010\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t56\t3\t140\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102359266-102359269.0\t0.0010\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t224\t3\t166\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102402251-102402254.0\t0.0010\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t112\t3\t275\texon1=no\texon2=no\texon3=no\tna\n-AA-AA-10-102416631-10'..b'40\t139\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94590637-94590776.0\t0.0010\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t140\t139\t84\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94592228-94592312.0\t0.0010\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t139\t84\t120\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94592394-94592514.0\t0.0010\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t84\t120\t117\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94593498-94593615.0\t0.0010\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t120\t117\t160\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94594694-94594854.0\t0.0010\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t160\t97\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94596859-94596956.0\t0.0010\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t160\t97\t77\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94597034-94597111.0\t0.0010\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t97\t77\t572\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94687769-94687932.0\t0.0010\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t494\t163\t150\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94688124-94688274.0\t0.0010\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94694916-94695077.0\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94694916-94695077.1\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94720395-94720537.0\t0.0010\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94720395-94720537.1\t0.0010\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t142\t188\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94724345-94724533.0\t0.0010\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94733296-94733438.0\t0.0010\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t188\t142\t929\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94775057-94775220.0\t0.0010\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t193\t163\t150\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94775389-94775539.0\t0.0010\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94780498-94780659.0\t0.0010\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94781820-94781997.0\t0.0010\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t177\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94820495-94820637.0\t0.0010\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94842836-94843024.0\t0.0010\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n-CA-CS-10-94849916-94850058.0\t0.0010\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0\t0\t0\t0.0000\t\n\\ No newline at end of file\n' |