Previous changeset 0:d4ca551ca300 (2017-09-07) Next changeset 2:01169764d7ca (2017-09-07) |
Commit message:
planemo upload |
added:
PostAnalysis SpliceChange SpliceTrap.pl TXdbgen bin/ApplyCutoff.jie.pl bin/Pair_estimate_c bin/PostAnalysis bin/PostAnalysis.pl bin/SpliceChange bin/SpliceChange.pl bin/SpliceTrap bin/SpliceTrap.pl bin/SpliceTrap_measure.pl bin/TXdbgen bin/TXdbgen.pl bin/apply_cutoff.sh bin/batch_para_cov10p_fit.sh bin/batchqsub.pl bin/batchqsub.pl_orig bin/beta_fit.R bin/bowtie2eland.pl bin/calc_pval.R bin/downloaddb.pl bin/get.frag.size.pl bin/get.hist.pl bin/get_bed_fa_j.pl bin/get_event_dist_fit.pl bin/gtf2bed.pl bin/mapping_bowtie.sh bin/mapping_rmap.sh bin/mark.mt.4eland.pl bin/rmap2eland.pl bin/scan_nomt.pl bin/scanbed2txdb.pl bin/splitdb.sh bin/vslz.pl cutoffs/cutoff.pair.06.txt cutoffs/cutoff.pair.07.txt cutoffs/cutoff.pair.08.txt refGenes.bed splice_trap.xml src/Makefile src/splicetrap.estimate.cpp test-data/input1.fastq test-data/input2.fastq test-data/output1.txt test-data/output2.txt |
removed:
SpliceTrap.tar.gz |
b |
diff -r d4ca551ca300 -r adc0f7765d85 PostAnalysis --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PostAnalysis Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,66 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is a wrapup for Post analysis based on the ratio file output + +use strict; +use Getopt::Long; +my $RatioFile=""; +my $OutputFile = ""; +my $JunctionCut=5; +my $CutoffLevel="M"; +my $noIRM = 0; +my $noIRMstr=""; + +GetOptions ( + "i:s"=>\$RatioFile, + "o:s"=>\$OutputFile, + "c:s"=>\$CutoffLevel, + "noIRM|noirm"=>\$noIRM, + "j:i"=>\$JunctionCut +); + +my $InputParaDes=" Usage of the script: + -i input file (.ratio file) + -o output file + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads per junction requirement for each exon-isoform [5] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($RatioFile eq "") +{ + print $InputParaDes; + exit; +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} +if($noIRM) +{ + $noIRMstr= "noirm"; +} + + +system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); + +open(rawfile, "$OutputFile.raw"); +open(outfile, ">$OutputFile"); +while(my $line=<rawfile>) +{ + chomp($line); + my @a=split("\t",$line); + if($noIRM) + { + print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } + else + { + print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } +} +close(outfile); +close(rawfile); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 SpliceChange --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SpliceChange Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,176 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# compare two outputs from SpliceTrap +use strict; + +# the information needed +# inclusion ratio input file +# filtered out or not input file +# minimal inclusion ratio at least 0.1 for one condition +# minimal splicing changes parameter +# orignial pipeline written by Martin Akerman +# re-organized and re-written by Jie Wu + +use FileHandle; + +use Getopt::Long; + +my @programs = ('grep','mkdir','R','paste','awk','sort'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); +} + + +my $InputFileName1 = ""; +my $InputFileName2 = ""; +my $OutputFileName = ""; +my $minchange = 0.3; +my $mininc = 0.1; +my $noIRM = 0; + + +GetOptions ( + "1:s"=>\$InputFileName1, + "2:s"=>\$InputFileName2, + "o:s"=>\$OutputFileName, + "noIRM|noirm"=>\$noIRM, + "m:f"=>\$mininc, + "c:f"=>\$minchange +); + +my $InputParaDes=" Usage of the script: + -1 input file 1, output from SpliceTrap, *.raw file in the output folder + -2 input file 2. see above. + -o output file prefix. + -c minimal change required, [default:0.3] + -m minimal inclusion ratio for at least one condition. [defualt:0.1] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") +{ + print $InputParaDes; + exit; +} + + + +if(-d "$OutputFileName.cache" ) +{ + print "Aborted! output cache folder exists: $OutputFileName.cache \n"; + exit; +} +else +{ + system("mkdir $OutputFileName.cache"); +} + +# +my %ir1; # records ir from file1 +my %ir2; # records ir from file2 +# only records trios above the cutoffs + +open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; +while(my $line=<input1>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir1{$a[0]} = $a[1]; + } + else + { + $ir1{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; +close(input1); + +open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; +while(my $line=<input2>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir2{$a[0]} = $a[1]; + } + else + { + $ir2{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; + + +close(input2); + + +## +my %mean; +my %sd; + +my %num; + +my %filehandles; + +my @types = ("CA", "IR", "AD","AA"); + +foreach my $type (@types) +{ + my $fh = new FileHandle; + open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; + $filehandles{$type} = $fh; +} + + +foreach my $key (keys %ir1) +{ + if(exists $ir2{$key}) + { + if(($ir1{$key} + $ir2{$key}) > 0) + { + #find the type + my $type = substr($key, 0, 2); + $type = "CA" if $type eq "CS"; + $num{$type}++; + + my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); + $mean{$type} = $mean{$type} + $change; + $sd{$type} = $change*$change + $sd{$type}; + + $change = sprintf("%.4f",$change); + + my $fout = $filehandles{$type}; + print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; + } + } +} + +foreach my $type (keys %filehandles) +{ + close($filehandles{$type}); + if($num{$type} == 0) + { + warn "no AS events passed filters for both files\n"; + next; + } + $mean{$type} = $mean{$type}/$num{$type}; + $sd{$type} = sqrt($sd{$type}/$num{$type}); + system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); + system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); + print "$num{$type} $type events processed...\n"; + #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; + +} +system("rm $OutputFileName.cache -rf"); + + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 SpliceTrap.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SpliceTrap.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
b'@@ -0,0 +1,285 @@\n+#!/usr/bin/perl\n+# Author: wuj@cshl.edu\n+# Modified: Baekdoo Kim (baegi7942@gmail.com)\n+use strict;\n+use Getopt::Long;\n+use Data::Dumper;\n+####################\n+use Cwd;\n+my $PROG = $0;\n+my $CUR_DIR = Cwd::abs_path(Cwd::cwd());\n+my $PROG_ABS_PATH = Cwd::abs_path($PROG);\n+#my $SrcFolder=`dirname $PROG_ABS_PATH`;\n+#chomp($SrcFolder);\n+#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";\n+#my $SrcFolder=$config{SrcFolder};\n+\n+my @programs = (\'R\',\'echo\',\'cat\',\'bash\',\'perl\',\'ln\',\'mkdir\',\'paste\',\'grep\',\'sort\',\'basename\',\'awk\',\'wc\',\'mv\',\'cd\',\'rm\',\'split\',\'head\' );\n+foreach my $program (@programs)\n+{\n+ die ("CHECK: $program not found\\n") if(system("hash $program >/dev/null"));\n+\n+}\n+\n+####################\n+my $SrcFolder="";\n+my $MapSoftware="bowtie";\n+my $DatabasePrefix="hg38";\n+my $ReadFileFormat="";\n+my $ReadFile1Name="";\n+my $ReadFile2Name="";\n+my $CutoffLevel="M";\n+my $Outputfolder=$CUR_DIR;\n+my $OutputPrefix="Result";\n+#my $CutoffOnly=0;\n+my $ReadSize=36;\n+my $JunctionCut=5;\n+my $onGalaxy_raw="";\n+my $onGalaxy_txt="";\n+my $BowtieThreads=1;\n+my $noIRMstr="";\n+my $noIRM = 0;\n+\n+my $num_args = $#ARGV;\n+$onGalaxy_raw = $ARGV[$num_args-1];\n+$onGalaxy_txt = $ARGV[$num_args];\n+\n+GetOptions (\n+\t"l:s"=>\\$SrcFolder,\n+ "m:s"=>\\$MapSoftware,\n+ "d:s"=>\\$DatabasePrefix,\n+# "f:s"=>\\$ReadFileFormat,\n+ "1:s"=>\\$ReadFile1Name,\n+ "2:s"=>\\$ReadFile2Name,\n+ "c:s"=>\\$CutoffLevel,\n+ "outdir:s"=>\\$Outputfolder,\n+ "o:s"=>\\$OutputPrefix,\n+ "j:i"=>\\$JunctionCut,\n+ "s:i"=>\\$ReadSize,\n+ "p:i"=>\\$BowtieThreads,\n+ "noIRM|noirm"=>\\$noIRM\n+# "local:s"=>\\$local,\n+# "rerun"=>\\$CutoffOnly\n+);\n+#-O for galaxy output\n+\n+\n+my $InputParaDes=" Usage of the script:\n+\t-l\tBase Location (required)\n+ -m Mapping software: [bowtie]/rmap\n+ -d Database prefix: [hg18]/mm9/rn4/userdefined\n+ -1 Read File 1\n+ -2 Read File 2\n+ -c Cutoff Level:H/[M]/L\n+ Means High, Middle or Low\n+ -j Junction reads requirement per junction for each exon-isoform [5]\n+ -o Output prefix {Result}\n+ -s Read Size [36]\n+ --outdir Output folder [./]\n+ -p Bowtie parameter, threads number, only use this when you don\'t use qsub [1]\n+ --noIRM Skip the IRM correction step\n+ \n+ This is a quick help, please refer to the README file for details.\n+";\n+\n+\n+if($SrcFolder eq "") {\n+\tprint "[CHECK] - Please provide the location of the script (option \'-l\')\\n\\n";\n+\texit;\n+}\n+\n+if($ReadFile2Name eq "")\n+{\n+ $ReadFile2Name = $ReadFile1Name;\n+ #trigger singled end mode\n+}\n+\n+if($ReadFile1Name eq "" or $ReadFile2Name eq "" )\n+{\n+ print $InputParaDes;\n+ exit;\n+}\n+\n+if($BowtieThreads < 1)\n+{\n+ print $InputParaDes;\n+ exit;\n+}\n+\n+if (! -e "$SrcFolder/db/$DatabasePrefix/parallel")\n+{\n+ print "CHECK: Error, the database you specified is not properly installed.\\n";\n+ #print $InputParaDes;\n+ exit;\n+\n+}\n+\n+if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L")\n+{\n+ print $InputParaDes;\n+ exit;\n+}\n+\n+\n+$ReadFile1Name = Cwd::abs_path($ReadFile1Name);\n+$ReadFile2Name = Cwd::abs_path($ReadFile2Name);\n+\n+#check the files\n+open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\\n");\n+my $checkoneline = <check>;\n+if(substr($checkoneline,0,1) eq ">")\n+{\n+ $ReadFileFormat = "fasta";\n+}\n+elsif(substr($checkoneline,0,1) eq "@")\n+{\n+ $ReadFileFormat = "fastq";\n+}\n+else\n+{\n+ die("CHECK: ERROR:Please check $ReadFile1Name\\n");\n+}\n+close(check);\n+\n+open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\\n");\n+my $checkoneline = <check>;\n+if(substr($checkoneline,0,1) eq ">")\n+{\n+ die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\\n") if ($ReadFileFormat ne "fasta");\n+}\n+elsif(substr($checkoneline,0,1) eq "'..b'print "CHECK: checking rmap...\\n";\n+ if(system("type rmap &>/dev/null") ==0 )\n+ {\n+ print "CHECK: rmap found, continue\\n";\n+ }\n+ else\n+ {\n+ die "CHECK: No rmap found in PATH, EXIT!\\n";\n+ }\n+}\n+else\n+{\n+ die "CHECK: option -m only takes rmap or bowtie as inputs\\n";\n+}\n+\n+if($ReadSize == 0)\n+{\n+ die "CHECK: Please check option -s Read size\\n";\n+}\n+\n+if($noIRM)\n+{\n+ $noIRMstr= "noirm";\n+}\n+\n+#write more checks later\n+print "PARAMETERS:\\tMapping software: ",$MapSoftware,"\\n";\n+print "PARAMETERS:\\tDatabase prefix: ",$DatabasePrefix,"\\n";\n+print "PARAMETERS:\\tRead end 1: ",$ReadFile1Name,"\\n";\n+print "PARAMETERS:\\tRead end 2: ",$ReadFile2Name,"\\n" if($ReadFile2Name ne $ReadFile1Name);\n+print "PARAMETERS:\\tGalaxy_raw: ",$onGalaxy_raw,"\\n"; #if($onGalaxy_raw ne "");\n+print "PARAMETERS:\\tGalaxy_txt: ",$onGalaxy_txt,"\\n"; #if($onGalaxy_txt ne "");\n+print "PARAMETERS:\\tCutoff level: ",$CutoffLevel,"\\n";\n+print "PARAMETERS:\\tJunction reads.min:",$JunctionCut,"\\n";\n+print "PARAMETERS:\\tOutput folder: ",$Outputfolder,"\\n";\n+print "PARAMETERS:\\tOutput prefix: ",$OutputPrefix,"\\n";\n+print "PARAMETERS:\\tRead size: ",$ReadSize,"\\n";\n+print "PARAMETERS:\\tBowtie threads #: ",$BowtieThreads,"\\n";\n+print "PARAMETERS:\\tNo IRM.\\n" if ($noIRM);\n+\n+if($MapSoftware eq "bowtie")\n+{\n+ print "=================STAGE 1 MAPPING===================\\n";\n+ system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");\n+ system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);\n+ print "=================STAGE 2 ESTIMATION================\\n";\n+ # ratio, log, nums\n+ system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n+ print "=================STAGE 3 CUTOFF====================\\n";\n+ # raw\n+ system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n+\n+\n+}\n+\n+if($MapSoftware eq "rmap")\n+{\n+ print "=================STAGE 1 MAPPING===================\\n";\n+\n+ system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;\n+ system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);\n+ print "=================STAGE 2 ESTIMATION================\\n";\n+\n+ system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n+ print "=================STAGE 3 CUTOFF====================\\n";\n+ system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n+\n+\n+}\n+\n+#print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\\n";\n+\n+if($onGalaxy_raw ne "" && $onGalaxy_txt ne "")\n+{\n+ print "OUTPUTFILE:$OutputPrefix.raw\\n";\n+ system("grep -v na $Outputfolder/$OutputPrefix.raw >$onGalaxy_raw");\n+ print "OUTPUTFILE:$OutputPrefix.txt\\n";\n+ system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy_txt");\n+}\n+\n+print "============Clean up\\n";\n+system("rm -r $Outputfolder/$OutputPrefix.*");\n+\n+sub random_sessid {\n+ #my @chars = (0..9,a..z,A..Z);\n+ my @chars = (\'a\'..\'z\',\'A\'..\'Z\');\n+ my $len = 10;\n+ my $string = join \'\', map {$chars[rand(@chars)]} (1..$len);\n+ return $string;\n+} \n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 SpliceTrap.tar.gz |
b |
Binary file SpliceTrap.tar.gz has changed |
b |
diff -r d4ca551ca300 -r adc0f7765d85 TXdbgen --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TXdbgen Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,97 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is to generate TXdb database files from bed/gtf file + +use strict; +use Cwd; +use Getopt::Long; + +my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + + +my $genomedir = ""; + +my $annofilename = ""; +my $txdbname = "userdefined"; +my $knownonly = 0; +my $gtfinput = 0; + +GetOptions ( + "g:s"=>\$genomedir, + "a:s"=>\$annofilename, + "n:s"=>\$txdbname, + "gtf"=>\$gtfinput, + "knownonly"=>\$knownonly +); + +my $InputParaDes=" Usage of the script: + -g genome fasta file location + -a annotation file (bed/gtf) + -n txdb name + --gtf specify this if annotation file is in gtf format +"; + +if($genomedir eq "" or $annofilename eq "") +{ + print $InputParaDes; + exit; +} + +$genomedir = Cwd::abs_path($genomedir); +$annofilename = Cwd::abs_path($annofilename); + +my $annofilebase = `basename $annofilename`; +chomp($annofilebase); +#need a cache folder to avoid mess + +my $cachefolder = $annofilebase.".cache"; + +if (! -e $cachefolder) +{ + mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; +} +if($gtfinput) +{ + print "TXDBGEN: converting gtf file into bed format\n"; + system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); + $annofilename = "$cachefolder/$annofilebase.bed"; +} + + +print "TXDBGEN: scan $annofilename for AS events...\n"; +system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); +print "TXDBGEN: fetch sequences from $genomedir...\n"; +system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); +#get fasta file list +system("ls $genomedir/*.fa >$cachefolder/chr.list"); + +system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); + +print "TXDBGEN: generate files for parallel computing...\n"; +if (! -e "$cachefolder/parallel") +{ + mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; +} +system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); +system("rm $cachefolder/out.bed"); +system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); +system("rm $cachefolder/TXdb.tmp.evi"); +system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); +print "TXDBGEN: build Bowtie index...\n"; + +if (! -e "$cachefolder/btw") +{ + mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; +} +system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); +system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); +print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; + +system("mv $cachefolder $SrcFolder/../db/$txdbname"); +print "TXDBGEN: Done!\n"; + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/ApplyCutoff.jie.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/ApplyCutoff.jie.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,108 @@ +#apply our cuttoff hash table on the IR calculated by Jie +#Modified from Martin's code +use strict; + +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +my $SrcFolder=`dirname $PROG_ABS_PATH`; +chomp($SrcFolder); + +my %cutoff; +my @Exlen; + +my $cutoff_level=$ARGV[1]; +my $JunctionCut = $ARGV[2]; +my $noirm = $ARGV[3]; + +my $cutoff_level_index=7; + + $cutoff_level_index=8 if $cutoff_level eq "H"; +$cutoff_level_index=6 if $cutoff_level eq "L"; + +open(CUT,"$SrcFolder/../cutoffs/cutoff.pair.0".$cutoff_level_index.".txt") || die "cutoff file not found $!\n"; + +while(<CUT>){ + chomp; + my @a=split(/\t/,$_); + push @Exlen,$a[0]; + $cutoff{$a[0]}=$a[1]; +} +close(CUT); + +open(IN,$ARGV[0]); + +while(<IN>){ + chomp; + my @a=split(/\t/,$_); + my $Ez='Ez=yes'; + my $print=$_; + if($a[0]=~m/#/g){next} + my $eventid=substr($a[0],0,2); + my $bir =$a[2]; + $bir =$a[1] if($noirm eq "noirm"); + my $j12 = $a[8]; + my $j23 = $a[9]; + my $j13 = $a[10]; + my $cov1=$a[11]; + my $cov2=$a[12]; + my $cov3=$a[13]; + my $siz1=$a[15]; + my $siz2=$a[16]; + my $siz3=$a[17]; + + + my $stat1='exon1='.cutoff($siz1,$cov1,\@Exlen,%cutoff); + my $stat2='exon2='.cutoff($siz2,$cov2,\@Exlen,%cutoff); + my $stat3='exon3='.cutoff($siz3,$cov3,\@Exlen,%cutoff); + if($stat1 eq "exon1=yes" and $stat3 eq "exon3=yes") + { + #$Ez="passed"; + $Ez=$eventid if $eventid eq "AA"; + $Ez=$eventid if $eventid eq "AD"; + $Ez=$eventid if $eventid eq "IR"; + if ($eventid eq "CS" or $eventid eq "CA" or $eventid eq "ME") + { + if($bir >0.9) + { + $Ez = "CS"; + } + else + { + $Ez = "CA"; + } + } + + } + else + { + #$Ez="declined"; + $Ez = "na"; + } + if( ($j12<$JunctionCut or $j23<$JunctionCut) and $j13 <$JunctionCut) + { + $Ez = "na"; + } + print $print,"\t",$stat1,"\t",$stat2,"\t",$stat3,"\t",$Ez,"\n"; +} +close(IN); +#################################################################### + +sub cutoff{ + my($s,$c,$E,%cutoff)=@_; + my @Exlen=@$E; + if($c eq 'NA'){return('NA')} + my $range=$Exlen[$#Exlen]; + foreach my $l(@Exlen){if($s<$l){$range=$l;last}} + if($c<$cutoff{$range}){return('no')} + return('yes') +} + + +sub contain{ + my ($a,@a)=@_; + foreach(@a){if($a eq $_){return(1)}} + return(0) +} + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/Pair_estimate_c |
b |
Binary file bin/Pair_estimate_c has changed |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/PostAnalysis --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/PostAnalysis Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,66 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is a wrapup for Post analysis based on the ratio file output + +use strict; +use Getopt::Long; +my $RatioFile=""; +my $OutputFile = ""; +my $JunctionCut=5; +my $CutoffLevel="M"; +my $noIRM = 0; +my $noIRMstr=""; + +GetOptions ( + "i:s"=>\$RatioFile, + "o:s"=>\$OutputFile, + "c:s"=>\$CutoffLevel, + "noIRM|noirm"=>\$noIRM, + "j:i"=>\$JunctionCut +); + +my $InputParaDes=" Usage of the script: + -i input file (.ratio file) + -o output file + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads per junction requirement for each exon-isoform [5] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($RatioFile eq "") +{ + print $InputParaDes; + exit; +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} +if($noIRM) +{ + $noIRMstr= "noirm"; +} + + +system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); + +open(rawfile, "$OutputFile.raw"); +open(outfile, ">$OutputFile"); +while(my $line=<rawfile>) +{ + chomp($line); + my @a=split("\t",$line); + if($noIRM) + { + print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } + else + { + print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } +} +close(outfile); +close(rawfile); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/PostAnalysis.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/PostAnalysis.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,64 @@ +# this script is a wrapup for Post analysis based on the ratio file output + +use strict; +use Getopt::Long; +my $RatioFile=""; +my $OutputFile = ""; +my $JunctionCut=5; +my $CutoffLevel="M"; +my $noIRM = 0; +my $noIRMstr=""; + +GetOptions ( + "i:s"=>\$RatioFile, + "o:s"=>\$OutputFile, + "c:s"=>\$CutoffLevel, + "noIRM|noirm"=>\$noIRM, + "j:i"=>\$JunctionCut +); + +my $InputParaDes=" Usage of the script: + -i input file (.ratio file) + -o output file + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads per junction requirement for each exon-isoform [5] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($RatioFile eq "") +{ + print $InputParaDes; + exit; +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} +if($noIRM) +{ + $noIRMstr= "noirm"; +} + + +system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); + +open(rawfile, "$OutputFile.raw"); +open(outfile, ">$OutputFile"); +while(my $line=<rawfile>) +{ + chomp($line); + my @a=split("\t",$line); + if($noIRM) + { + print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } + else + { + print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } +} +close(outfile); +close(rawfile); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/SpliceChange --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/SpliceChange Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,176 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# compare two outputs from SpliceTrap +use strict; + +# the information needed +# inclusion ratio input file +# filtered out or not input file +# minimal inclusion ratio at least 0.1 for one condition +# minimal splicing changes parameter +# orignial pipeline written by Martin Akerman +# re-organized and re-written by Jie Wu + +use FileHandle; + +use Getopt::Long; + +my @programs = ('grep','mkdir','R','paste','awk','sort'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); +} + + +my $InputFileName1 = ""; +my $InputFileName2 = ""; +my $OutputFileName = ""; +my $minchange = 0.3; +my $mininc = 0.1; +my $noIRM = 0; + + +GetOptions ( + "1:s"=>\$InputFileName1, + "2:s"=>\$InputFileName2, + "o:s"=>\$OutputFileName, + "noIRM|noirm"=>\$noIRM, + "m:f"=>\$mininc, + "c:f"=>\$minchange +); + +my $InputParaDes=" Usage of the script: + -1 input file 1, output from SpliceTrap, *.raw file in the output folder + -2 input file 2. see above. + -o output file prefix. + -c minimal change required, [default:0.3] + -m minimal inclusion ratio for at least one condition. [defualt:0.1] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") +{ + print $InputParaDes; + exit; +} + + + +if(-d "$OutputFileName.cache" ) +{ + print "Aborted! output cache folder exists: $OutputFileName.cache \n"; + exit; +} +else +{ + system("mkdir $OutputFileName.cache"); +} + +# +my %ir1; # records ir from file1 +my %ir2; # records ir from file2 +# only records trios above the cutoffs + +open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; +while(my $line=<input1>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir1{$a[0]} = $a[1]; + } + else + { + $ir1{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; +close(input1); + +open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; +while(my $line=<input2>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir2{$a[0]} = $a[1]; + } + else + { + $ir2{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; + + +close(input2); + + +## +my %mean; +my %sd; + +my %num; + +my %filehandles; + +my @types = ("CA", "IR", "AD","AA"); + +foreach my $type (@types) +{ + my $fh = new FileHandle; + open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; + $filehandles{$type} = $fh; +} + + +foreach my $key (keys %ir1) +{ + if(exists $ir2{$key}) + { + if(($ir1{$key} + $ir2{$key}) > 0) + { + #find the type + my $type = substr($key, 0, 2); + $type = "CA" if $type eq "CS"; + $num{$type}++; + + my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); + $mean{$type} = $mean{$type} + $change; + $sd{$type} = $change*$change + $sd{$type}; + + $change = sprintf("%.4f",$change); + + my $fout = $filehandles{$type}; + print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; + } + } +} + +foreach my $type (keys %filehandles) +{ + close($filehandles{$type}); + if($num{$type} == 0) + { + warn "no AS events passed filters for both files\n"; + next; + } + $mean{$type} = $mean{$type}/$num{$type}; + $sd{$type} = sqrt($sd{$type}/$num{$type}); + system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); + system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); + print "$num{$type} $type events processed...\n"; + #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; + +} +system("rm $OutputFileName.cache -rf"); + + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/SpliceChange.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/SpliceChange.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,174 @@ +# compare two outputs from SpliceTrap +use strict; + +# the information needed +# inclusion ratio input file +# filtered out or not input file +# minimal inclusion ratio at least 0.1 for one condition +# minimal splicing changes parameter +# orignial pipeline written by Martin Akerman +# re-organized and re-written by Jie Wu + +use FileHandle; + +use Getopt::Long; + +my @programs = ('grep','mkdir','R','paste','awk','sort'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); +} + + +my $InputFileName1 = ""; +my $InputFileName2 = ""; +my $OutputFileName = ""; +my $minchange = 0.3; +my $mininc = 0.1; +my $noIRM = 0; + + +GetOptions ( + "1:s"=>\$InputFileName1, + "2:s"=>\$InputFileName2, + "o:s"=>\$OutputFileName, + "noIRM|noirm"=>\$noIRM, + "m:f"=>\$mininc, + "c:f"=>\$minchange +); + +my $InputParaDes=" Usage of the script: + -1 input file 1, output from SpliceTrap, *.raw file in the output folder + -2 input file 2. see above. + -o output file prefix. + -c minimal change required, [default:0.3] + -m minimal inclusion ratio for at least one condition. [defualt:0.1] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") +{ + print $InputParaDes; + exit; +} + + + +if(-d "$OutputFileName.cache" ) +{ + print "Aborted! output cache folder exists: $OutputFileName.cache \n"; + exit; +} +else +{ + system("mkdir $OutputFileName.cache"); +} + +# +my %ir1; # records ir from file1 +my %ir2; # records ir from file2 +# only records trios above the cutoffs + +open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; +while(my $line=<input1>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir1{$a[0]} = $a[1]; + } + else + { + $ir1{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; +close(input1); + +open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; +while(my $line=<input2>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir2{$a[0]} = $a[1]; + } + else + { + $ir2{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; + + +close(input2); + + +## +my %mean; +my %sd; + +my %num; + +my %filehandles; + +my @types = ("CA", "IR", "AD","AA"); + +foreach my $type (@types) +{ + my $fh = new FileHandle; + open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; + $filehandles{$type} = $fh; +} + + +foreach my $key (keys %ir1) +{ + if(exists $ir2{$key}) + { + if(($ir1{$key} + $ir2{$key}) > 0) + { + #find the type + my $type = substr($key, 0, 2); + $type = "CA" if $type eq "CS"; + $num{$type}++; + + my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); + $mean{$type} = $mean{$type} + $change; + $sd{$type} = $change*$change + $sd{$type}; + + $change = sprintf("%.4f",$change); + + my $fout = $filehandles{$type}; + print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; + } + } +} + +foreach my $type (keys %filehandles) +{ + close($filehandles{$type}); + if($num{$type} == 0) + { + warn "no AS events passed filters for both files\n"; + next; + } + $mean{$type} = $mean{$type}/$num{$type}; + $sd{$type} = sqrt($sd{$type}/$num{$type}); + system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); + system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); + print "$num{$type} $type events processed...\n"; + #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; + +} +system("rm $OutputFileName.cache -rf"); + + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/SpliceTrap --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/SpliceTrap Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,263 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# Author: wuj@cshl.edu +use strict; +use Getopt::Long; +#################### +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +#my $SrcFolder=`dirname $PROG_ABS_PATH`; +#chomp($SrcFolder); +#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +#my $SrcFolder=$config{SrcFolder}; + +my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + +#################### +my $MapSoftware="bowtie"; +my $DatabasePrefix="hg18"; +my $ReadFileFormat=""; +my $ReadFile1Name=""; +my $ReadFile2Name=""; +my $CutoffLevel="M"; +my $Outputfolder=$CUR_DIR; +my $OutputPrefix="Result"; +#my $CutoffOnly=0; +my $ReadSize=36; +my $JunctionCut=5; +my $onGalaxy=""; +my $BowtieThreads=1; +my $noIRMstr=""; +my $noIRM = 0; + +GetOptions ( + "m:s"=>\$MapSoftware, + "d:s"=>\$DatabasePrefix, +# "f:s"=>\$ReadFileFormat, + "1:s"=>\$ReadFile1Name, + "2:s"=>\$ReadFile2Name, + "c:s"=>\$CutoffLevel, + "outdir:s"=>\$Outputfolder, + "o:s"=>\$OutputPrefix, + "j:i"=>\$JunctionCut, + "s:i"=>\$ReadSize, + "p:i"=>\$BowtieThreads, + "noIRM|noirm"=>\$noIRM, + "g:s"=>\$onGalaxy +# "local:s"=>\$local, +# "rerun"=>\$CutoffOnly +); +#-O for galaxy output + + +my $InputParaDes=" Usage of the script: + -m Mapping software: [bowtie]/rmap + -d Database prefix: [hg18]/mm9/rn4/userdefined + -1 Read File 1 + -2 Read File 2 + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads requirement per junction for each exon-isoform [5] + -o Output prefix {Result} + -s Read Size [36] + --outdir Output folder [./] + -p Bowtie parameter, threads number, only use this when you don't use qsub [1] + --noIRM Skip the IRM correction step + + This is a quick help, please refer to the README file for details. +"; + +if($ReadFile2Name eq "") +{ + $ReadFile2Name = $ReadFile1Name; + #trigger singled end mode +} + +if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) +{ + print $InputParaDes; + exit; +} + +if($BowtieThreads < 1) +{ + print $InputParaDes; + exit; +} + +if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel") +{ + print "CHECK: Error, the database you specified is not properly installed.\n"; + #print $InputParaDes; + exit; + +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} + +$ReadFile1Name = Cwd::abs_path($ReadFile1Name); +$ReadFile2Name = Cwd::abs_path($ReadFile2Name); + +#check the files +open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + $ReadFileFormat = "fasta"; +} +elsif(substr($checkoneline,0,1) eq "@") +{ + $ReadFileFormat = "fastq"; +} +else +{ + die("CHECK: ERROR:Please check $ReadFile1Name\n"); +} +close(check); + +open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); +} +elsif(substr($checkoneline,0,1) eq "@") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); +} +else +{ + die("CHECK: ERROR:Please check $ReadFile2Name\n"); +} +close(check); + +$Outputfolder= Cwd::abs_path($Outputfolder); +if($Outputfolder eq "/tmp") +{ + while(-e $Outputfolder) + { + my $random_foldername = random_sessid(); + $Outputfolder = "/tmp/".$random_foldername; + } +} + + +if(! -e $Outputfolder) +{ + mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; +} +if(! -d $Outputfolder) +{ + die "CHECK: $Outputfolder is not a folder\n"; +} + +if($MapSoftware eq "bowtie") +{ + print "CHECK: whether bowtie installed and in PATH\n"; + my $bowtiechecker=`bowtie --version`; + if($bowtiechecker ne "") + { + print "CHECK: bowtie found, information below:\n"; + print $bowtiechecker,"\n"; + } + else + { + die "CHECK: No bowtie found in PATH, EXIT!\n"; + } +} +elsif($MapSoftware eq "rmap") +{ + print "CHECK: checking rmap...\n"; + if(system("type rmap &>/dev/null") ==0 ) + { + print "CHECK: rmap found, continue\n"; + } + else + { + die "CHECK: No rmap found in PATH, EXIT!\n"; + } +} +else +{ + die "CHECK: option -m only takes rmap or bowtie as inputs\n"; +} + +if($ReadSize == 0) +{ + die "CHECK: Please check option -s Read size\n"; +} + +if($noIRM) +{ + $noIRMstr= "noirm"; +} + +#write more checks later +print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; +print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; +print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; +print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); +print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; +print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; +print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; +print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; +print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; +print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; +print "PARAMETERS:\tNo IRM.\n" if ($noIRM); + +if($MapSoftware eq "bowtie") +{ + print "=================STAGE 1 MAPPING===================\n"; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +if($MapSoftware eq "rmap") +{ + print "=================STAGE 1 MAPPING===================\n"; + + system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; + system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; + +if($onGalaxy ne "") +{ + system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy"); +} + +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; + } + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/SpliceTrap.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/SpliceTrap.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,261 @@ +# Author: wuj@cshl.edu +use strict; +use Getopt::Long; +#################### +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +#my $SrcFolder=`dirname $PROG_ABS_PATH`; +#chomp($SrcFolder); +#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +#my $SrcFolder=$config{SrcFolder}; + +my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + +#################### +my $MapSoftware="bowtie"; +my $DatabasePrefix="hg18"; +my $ReadFileFormat=""; +my $ReadFile1Name=""; +my $ReadFile2Name=""; +my $CutoffLevel="M"; +my $Outputfolder=$CUR_DIR; +my $OutputPrefix="Result"; +#my $CutoffOnly=0; +my $ReadSize=36; +my $JunctionCut=5; +my $onGalaxy=""; +my $BowtieThreads=1; +my $noIRMstr=""; +my $noIRM = 0; + +GetOptions ( + "m:s"=>\$MapSoftware, + "d:s"=>\$DatabasePrefix, +# "f:s"=>\$ReadFileFormat, + "1:s"=>\$ReadFile1Name, + "2:s"=>\$ReadFile2Name, + "c:s"=>\$CutoffLevel, + "outdir:s"=>\$Outputfolder, + "o:s"=>\$OutputPrefix, + "j:i"=>\$JunctionCut, + "s:i"=>\$ReadSize, + "p:i"=>\$BowtieThreads, + "noIRM|noirm"=>\$noIRM, + "g:s"=>\$onGalaxy +# "local:s"=>\$local, +# "rerun"=>\$CutoffOnly +); +#-O for galaxy output + + +my $InputParaDes=" Usage of the script: + -m Mapping software: [bowtie]/rmap + -d Database prefix: [hg18]/mm9/rn4/userdefined + -1 Read File 1 + -2 Read File 2 + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads requirement per junction for each exon-isoform [5] + -o Output prefix {Result} + -s Read Size [36] + --outdir Output folder [./] + -p Bowtie parameter, threads number, only use this when you don't use qsub [1] + --noIRM Skip the IRM correction step + + This is a quick help, please refer to the README file for details. +"; + +if($ReadFile2Name eq "") +{ + $ReadFile2Name = $ReadFile1Name; + #trigger singled end mode +} + +if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) +{ + print $InputParaDes; + exit; +} + +if($BowtieThreads < 1) +{ + print $InputParaDes; + exit; +} + +if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel") +{ + print "CHECK: Error, the database you specified is not properly installed.\n"; + #print $InputParaDes; + exit; + +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} + +$ReadFile1Name = Cwd::abs_path($ReadFile1Name); +$ReadFile2Name = Cwd::abs_path($ReadFile2Name); + +#check the files +open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + $ReadFileFormat = "fasta"; +} +elsif(substr($checkoneline,0,1) eq "@") +{ + $ReadFileFormat = "fastq"; +} +else +{ + die("CHECK: ERROR:Please check $ReadFile1Name\n"); +} +close(check); + +open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); +} +elsif(substr($checkoneline,0,1) eq "@") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); +} +else +{ + die("CHECK: ERROR:Please check $ReadFile2Name\n"); +} +close(check); + +$Outputfolder= Cwd::abs_path($Outputfolder); +if($Outputfolder eq "/tmp") +{ + while(-e $Outputfolder) + { + my $random_foldername = random_sessid(); + $Outputfolder = "/tmp/".$random_foldername; + } +} + + +if(! -e $Outputfolder) +{ + mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; +} +if(! -d $Outputfolder) +{ + die "CHECK: $Outputfolder is not a folder\n"; +} + +if($MapSoftware eq "bowtie") +{ + print "CHECK: whether bowtie installed and in PATH\n"; + my $bowtiechecker=`bowtie --version`; + if($bowtiechecker ne "") + { + print "CHECK: bowtie found, information below:\n"; + print $bowtiechecker,"\n"; + } + else + { + die "CHECK: No bowtie found in PATH, EXIT!\n"; + } +} +elsif($MapSoftware eq "rmap") +{ + print "CHECK: checking rmap...\n"; + if(system("type rmap &>/dev/null") ==0 ) + { + print "CHECK: rmap found, continue\n"; + } + else + { + die "CHECK: No rmap found in PATH, EXIT!\n"; + } +} +else +{ + die "CHECK: option -m only takes rmap or bowtie as inputs\n"; +} + +if($ReadSize == 0) +{ + die "CHECK: Please check option -s Read size\n"; +} + +if($noIRM) +{ + $noIRMstr= "noirm"; +} + +#write more checks later +print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; +print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; +print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; +print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); +print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; +print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; +print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; +print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; +print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; +print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; +print "PARAMETERS:\tNo IRM.\n" if ($noIRM); + +if($MapSoftware eq "bowtie") +{ + print "=================STAGE 1 MAPPING===================\n"; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +if($MapSoftware eq "rmap") +{ + print "=================STAGE 1 MAPPING===================\n"; + + system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; + system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; + +if($onGalaxy ne "") +{ + system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy"); +} + +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; + } + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/SpliceTrap_measure.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/SpliceTrap_measure.pl Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +# Author: wuj@cshl.edu +use strict; +use Getopt::Long; +#################### +my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +my $SrcFolder=$config{SrcFolder}; +#my $SrcFolder="/data/zhang/wuj/tools/SpliceTrap.0.8"; +#################### +my $MapSoftware="eland"; +my $ReadFileFormat=""; +my $ReadFile1Name=""; +my $ReadFile2Name=""; +my $CutoffLevel="H"; +my $OutputPrefix="Result"; +my $CutoffOnly=0; +my $ReadSize=36; + +GetOptions ( + "m:s"=>\$MapSoftware, + "f:s"=>\$ReadFileFormat, + "1:s"=>\$ReadFile1Name, + "2:s"=>\$ReadFile2Name, + "c:s"=>\$CutoffLevel, + "o:s"=>\$OutputPrefix, + "s:i"=>\$ReadSize, +# "local:s"=>\$local, + "rerun"=>\$CutoffOnly +); + + +my $InputParaDes=" Usage of the script (v0.82): + -m Mapping software: eland/bowtie/rmap + -f Read File Format: fasta/fastq + -1 Read File 1 + -2 Read File 2 + -c Cutoff Level:H/M/L + Means High, Middle or Low + -o Output prefix + -s Read Size 36 + --rerun Only run the last step, which is filtering +"; + +if($ReadFile1Name eq "" or $ReadFile2Name eq "" or $ReadFileFormat eq "") +{ + print $InputParaDes; + exit; +} + +if($ReadFileFormat ne "fastq" and $ReadFileFormat ne "fasta") +{ + print $InputParaDes; + exit; + +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} + +my $dirname1=`dirname $ReadFile1Name`; +my $dirname2=`dirname $ReadFile2Name`; +if($dirname1 ne ".") +{ + system("ln -s $ReadFile1Name ./"); +} +if($dirname2 ne ".") +{ + system("ln -s $ReadFile2Name ./"); +} + +$ReadFile1Name = `basename $ReadFile1Name`; +chomp($ReadFile1Name); +$ReadFile2Name = `basename $ReadFile2Name`; +chomp($ReadFile2Name); +my $start = time; +if($MapSoftware eq "bowtie") +{ + print "=================STAGE 1 MAPPING===================\n"; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly; + print "STAGE 1 FINISHED IN ",time-$start," seconds\n"; + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly; + print "STAGE 2 FINISHED IN ",time-$start," seconds\n"; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel"); + print "STAGE 3 FINISHED IN ",time-$start," seconds\n"; + + + +} + +if($MapSoftware eq "rmap") +{ + print "=================STAGE 1 MAPPING===================\n"; + + system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly; + system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly; + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel"); + + +} + +print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/TXdbgen --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/TXdbgen Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,97 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is to generate TXdb database files from bed/gtf file + +use strict; +use Cwd; +use Getopt::Long; + +my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + + +my $genomedir = ""; + +my $annofilename = ""; +my $txdbname = "userdefined"; +my $knownonly = 0; +my $gtfinput = 0; + +GetOptions ( + "g:s"=>\$genomedir, + "a:s"=>\$annofilename, + "n:s"=>\$txdbname, + "gtf"=>\$gtfinput, + "knownonly"=>\$knownonly +); + +my $InputParaDes=" Usage of the script: + -g genome fasta file location + -a annotation file (bed/gtf) + -n txdb name + --gtf specify this if annotation file is in gtf format +"; + +if($genomedir eq "" or $annofilename eq "") +{ + print $InputParaDes; + exit; +} + +$genomedir = Cwd::abs_path($genomedir); +$annofilename = Cwd::abs_path($annofilename); + +my $annofilebase = `basename $annofilename`; +chomp($annofilebase); +#need a cache folder to avoid mess + +my $cachefolder = $annofilebase.".cache"; + +if (! -e $cachefolder) +{ + mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; +} +if($gtfinput) +{ + print "TXDBGEN: converting gtf file into bed format\n"; + system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); + $annofilename = "$cachefolder/$annofilebase.bed"; +} + + +print "TXDBGEN: scan $annofilename for AS events...\n"; +system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); +print "TXDBGEN: fetch sequences from $genomedir...\n"; +system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); +#get fasta file list +system("ls $genomedir/*.fa >$cachefolder/chr.list"); + +system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); + +print "TXDBGEN: generate files for parallel computing...\n"; +if (! -e "$cachefolder/parallel") +{ + mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; +} +system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); +system("rm $cachefolder/out.bed"); +system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); +system("rm $cachefolder/TXdb.tmp.evi"); +system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); +print "TXDBGEN: build Bowtie index...\n"; + +if (! -e "$cachefolder/btw") +{ + mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; +} +system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); +system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); +print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; + +system("mv $cachefolder $SrcFolder/../db/$txdbname"); +print "TXDBGEN: Done!\n"; + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/TXdbgen.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/TXdbgen.pl Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,95 @@ +# this script is to generate TXdb database files from bed/gtf file + +use strict; +use Cwd; +use Getopt::Long; + +my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + + +my $genomedir = ""; + +my $annofilename = ""; +my $txdbname = "userdefined"; +my $knownonly = 0; +my $gtfinput = 0; + +GetOptions ( + "g:s"=>\$genomedir, + "a:s"=>\$annofilename, + "n:s"=>\$txdbname, + "gtf"=>\$gtfinput, + "knownonly"=>\$knownonly +); + +my $InputParaDes=" Usage of the script: + -g genome fasta file location + -a annotation file (bed/gtf) + -n txdb name + --gtf specify this if annotation file is in gtf format +"; + +if($genomedir eq "" or $annofilename eq "") +{ + print $InputParaDes; + exit; +} + +$genomedir = Cwd::abs_path($genomedir); +$annofilename = Cwd::abs_path($annofilename); + +my $annofilebase = `basename $annofilename`; +chomp($annofilebase); +#need a cache folder to avoid mess + +my $cachefolder = $annofilebase.".cache"; + +if (! -e $cachefolder) +{ + mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; +} +if($gtfinput) +{ + print "TXDBGEN: converting gtf file into bed format\n"; + system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); + $annofilename = "$cachefolder/$annofilebase.bed"; +} + + +print "TXDBGEN: scan $annofilename for AS events...\n"; +system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); +print "TXDBGEN: fetch sequences from $genomedir...\n"; +system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); +#get fasta file list +system("ls $genomedir/*.fa >$cachefolder/chr.list"); + +system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); + +print "TXDBGEN: generate files for parallel computing...\n"; +if (! -e "$cachefolder/parallel") +{ + mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; +} +system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); +system("rm $cachefolder/out.bed"); +system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); +system("rm $cachefolder/TXdb.tmp.evi"); +system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); +print "TXDBGEN: build Bowtie index...\n"; + +if (! -e "$cachefolder/btw") +{ + mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; +} +system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); +system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); +print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; + +system("mv $cachefolder $SrcFolder/../db/$txdbname"); +print "TXDBGEN: Done!\n"; + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/apply_cutoff.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/apply_cutoff.sh Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,36 @@ +#SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8"; + +outputname=$1; +CutoffLevel=$2; +Outputfolder=$3 +SrcFolder=$5 +JunctionCut=$4 +noIRM=$8 + +echo "CUTOFF: Entering cutoff step..."; +echo "CUTOFF: Cache folder: $outputname.filter" +mkdir $Outputfolder/$outputname.filter +cd $Outputfolder/$outputname.filter +ln -s ../$outputname.ratio +ln -s ../$outputname.nums +echo "CUTOFF: spliting file....and generating shell scripts..." +split -11000 $outputname.ratio + +for ratiofiles in x* +do + echo "perl $SrcFolder/ApplyCutoff.jie.pl $ratiofiles $CutoffLevel $JunctionCut $noIRM > $ratiofiles.out" >>filter.sh +done + +echo "CUTOFF: submit scripts..." +perl $SrcFolder/batchqsub.pl filter.sh +echo "CUTOFF: merging file...." +cat *.out >../$outputname.raw +cd ../ +#perl /data/zhang/wuj/tools/SpliceTrap.0.8/ApplyCutoff.jie.pl $outputname.ratio $outputname.nums 8 >$outputname.txt +if [ "$noIRM" ];then + awk '{printf $22"\t"$2"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt +else + awk '{printf $22"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt +fi +rm $outputname.filter -rf + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/batch_para_cov10p_fit.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/batch_para_cov10p_fit.sh Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,101 @@ +#!/bin/bash +ReadFile1Name=`basename $1` +ReadFile2Name=`basename $2` +outputname=$3 +readsize=$4 +DatabasePrefix=$5 +Outputfolder=$6 +SrcFolder=$7 +noIRM=$8 + +cd $Outputfolder +if [ $ReadFile1Name != $ReadFile2Name ];then + echo "ESTIMATE: Getting fragment size information from data..." + perl $SrcFolder/get.frag.size.pl $ReadFile1Name.nomt $ReadFile2Name.nomt $readsize + perl $SrcFolder/get.hist.pl $ReadFile1Name.nomt.fragsize -w=1 -c=1 +else + echo "ESTIMATE: Generating the other half of reads..." + readnum=`wc -l $ReadFile1Name.nomt |cut -f1 -d" "` + for (( i=0; i<$readnum; i++ )) + do + echo "NM" >>$ReadFile1Name.f.nomt + done + echo "#Width:1" >$ReadFile1Name.nomt.fragsize.hist +fi +echo "ESTIMATE: Creating cache folder.." +if [ $ReadFile1Name != $ReadFile2Name ];then + ReadFile2FinalName=$ReadFile2Name.nomt +else + ReadFile2FinalName=$ReadFile1Name.f.nomt +fi + +mkdir $ReadFile1Name.result +cd $ReadFile1Name.result +ln -s ../$ReadFile1Name.nomt ./ +ln -s ../$ReadFile2FinalName ./ +ln -s ../$ReadFile1Name.nomt.fragsize.hist ./ +echo "ESTIMATE: Split mapping results via chromosomes..." +perl $SrcFolder/scan_nomt.pl $ReadFile1Name.nomt $ReadFile2FinalName +loopi=0 +echo "ESTIMATE: Generating shell scripts for Loop $loopi..." +while read chrlist +do + chr=`echo $chrlist |tr -d "\n"` + for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.* + do + base=`basename $dbfile` + echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -s $readsize" >>r$loopi.sh + done +done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list + +echo "ESTIMATE: Submit shell scripts for Loop $loopi..." +perl $SrcFolder/batchqsub.pl r$loopi.sh +echo "ESTIMATE: Loop $loopi done..." + +cat $ReadFile1Name.$loopi.*.ratio >$outputname.$loopi.ratio +cat $ReadFile1Name.$loopi.*.log >$outputname.$loopi.log +cat $ReadFile1Name.$loopi.*.nums >$outputname.$loopi.nums +rm $ReadFile1Name.$loopi.*.ratio +rm $ReadFile1Name.$loopi.*.log +rm $ReadFile1Name.$loopi.*.nums + + +if [ "$noIRM" ];then + echo "ESTIMATE: No IRM correction, skipped..." + mv $outputname.$loopi.ratio $outputname.ratio + mv $outputname.$loopi.log $outputname.log + mv $outputname.$loopi.nums $outputname.nums +else + + echo "ESTIMATE: derive IRMs from data..." + awk '{if ($15>=10) printf $0"\n"}' $outputname.$loopi.ratio >$outputname.mle + perl $SrcFolder/get_event_dist_fit.pl $outputname.mle -c=2 -w=0.001 + + loopi=1 + echo "ESTIMATE: Generating shell scripts for Loop $loopi..." + while read chrlist + do + chr=`echo $chrlist |tr -d "\n"` + for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.* + do + base=`basename $dbfile` + echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -b $outputname.mle.fit.hist -s $readsize" >>r$loopi.sh + done + done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list + echo "ESTIMATE: Submit shell scripts for Loop $loopi..." + +#perl $SrcFolder/qsub/batchqsub.pl r$loopi.sh $taskname + perl $SrcFolder/batchqsub.pl r$loopi.sh + echo "ESTIMATE: Loop $loopi done..." + cat $ReadFile1Name.$loopi.*.ratio >$outputname.ratio + cat $ReadFile1Name.$loopi.*.log >$outputname.log + cat $ReadFile1Name.$loopi.*.nums >$outputname.nums + rm $ReadFile1Name.$loopi.*.ratio + rm $ReadFile1Name.$loopi.*.log + rm $ReadFile1Name.$loopi.*.nums +fi + +mv $outputname.ratio $outputname.log $outputname.nums ../ +cd ../ +rm $ReadFile1Name.result -rf +rm $ReadFile1Name.nomt $ReadFile2FinalName |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/batchqsub.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/batchqsub.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,158 @@ + +# modified from Chenghai Xue's script + +#test if qsub works + +my $qsub_checker = 0; +if(system("hash qsub >/dev/null")) +{ + $qsub_checker = 0; +} +else +{ + $test_randname=random_sessid(); + system("mkdir $test_randname;"); + system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh"); + system("mkdir $test_randname/qsub_cache"); + system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname $test_randname/$test_randname.sh"); + + $status=0; + $sec=5; + while(1) + { + $chkresult=`qstat |grep $test_randname |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=5; + } + print "QSTAT: $chkresult testing tasks running.....$taskname\n"; + sleep($sec); + } + print "QSUB: testing done\n"; + print "$test_randname/$test_randname\n"; + if(-d "$test_randname/$test_randname") + { + $qsub_checker=1; + print "QSUB: working well!\n"; + } +#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`; + system("rm $test_randname -rf"); +} +#$qsub_checker=0; +if($qsub_checker == 0) +{ + print "QSUB: No GRID qsub found\n"; + print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n"; + print "QSUB: Running in serial mode...\n"; + system("sh $ARGV[0]"); + exit; +} + +$performListFile = $ARGV[0]; +$taskname = ""; +$taskname = $ARGV[1]; +if (not $taskname) +{ + $taskname=random_sessid(); +} +#$outfullDir = $ARGV[2]; +$outfullDir ="qsub_cache"; + +# correct path +if(! (-d $outfullDir) ){ + system ("mkdir $outfullDir"); +} + +# create a temp cache +@temp = split("/", $0); +$prog = pop @temp; +$cache = $outfullDir."/".$prog."_".$taskname; +if(! (-d $cache) ){ + system ("mkdir $cache"); +} + +open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n"; +@performList = (<IN_1>); +chomp @performList; +close IN_1 or die "can't close the input file : $!"; + + +$scriptListFile = $outfullDir."/".$taskname."_scripts.list"; +open (FSCRIPLIST, ">$scriptListFile"); +for($i=0; $i<@performList; $i++){ + $scriptFile = $outfullDir."/".$taskname."_script$i.sh"; + print FSCRIPLIST "$scriptFile\n"; + open (FOUT, ">$scriptFile"); + +# print FOUT "#!/usr/bin/sh\n"; + print FOUT "$performList[$i]\n"; + +# print OUT_1 "$outfile.map\n"; + close (FOUT); +} +close (FSCRIPLIST); + +open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n"; +$basename=`basename $performListFile`; +chop($basename); +$taskname=$taskname."_".$basename; +#print $basename; +while(<IN_2>){ + $f = $_; + chomp $f; + @temp = split("/", $f); + $base = pop @temp; + + #use default queues +# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n"; + #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + +} +close IN_2 or die "can't close the input file : $!"; + +#start to check stats of qsub tasks +####################################### +my $taskname_query=substr($taskname,0,10); + + +$status=0; +$sec=60; +while(1) +{ + $chkresult=`qstat |grep $taskname_query |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=60; + } + print "QSTAT: $chkresult tasks running.....$taskname\n"; + sleep($sec); +} +print "QSUB: done: $taskname \n"; +###################################### + +# +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; +} |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/batchqsub.pl_orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/batchqsub.pl_orig Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,158 @@ + +# modified from Chenghai Xue's script + +#test if qsub works + +my $qsub_checker = 0; +if(system("hash qsub >/dev/null")) +{ + $qsub_checker = 0; +} +else +{ + $test_randname=random_sessid(); + system("mkdir $test_randname;"); + system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh"); + system("mkdir $test_randname/qsub_cache"); + system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname $test_randname/$test_randname.sh"); + + $status=0; + $sec=5; + while(1) + { + $chkresult=`qstat |grep $test_randname |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=5; + } + print "QSTAT: $chkresult testing tasks running.....$taskname\n"; + sleep($sec); + } + print "QSUB: testing done\n"; + print "$test_randname/$test_randname\n"; + if(-d "$test_randname/$test_randname") + { + $qsub_checker=1; + print "QSUB: working well!\n"; + } +#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`; + system("rm $test_randname -rf"); +} +#$qsub_checker=0; +if($qsub_checker == 0) +{ + print "QSUB: No GRID qsub found\n"; + print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n"; + print "QSUB: Running in serial mode...\n"; + system("sh $ARGV[0]"); + exit; +} + +$performListFile = $ARGV[0]; +$taskname = ""; +$taskname = $ARGV[1]; +if (not $taskname) +{ + $taskname=random_sessid(); +} +#$outfullDir = $ARGV[2]; +$outfullDir ="qsub_cache"; + +# correct path +if(! (-d $outfullDir) ){ + system ("mkdir $outfullDir"); +} + +# create a temp cache +@temp = split("/", $0); +$prog = pop @temp; +$cache = $outfullDir."/".$prog."_".$taskname; +if(! (-d $cache) ){ + system ("mkdir $cache"); +} + +open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n"; +@performList = (<IN_1>); +chomp @performList; +close IN_1 or die "can't close the input file : $!"; + + +$scriptListFile = $outfullDir."/".$taskname."_scripts.list"; +open (FSCRIPLIST, ">$scriptListFile"); +for($i=0; $i<@performList; $i++){ + $scriptFile = $outfullDir."/".$taskname."_script$i.sh"; + print FSCRIPLIST "$scriptFile\n"; + open (FOUT, ">$scriptFile"); + +# print FOUT "#!/usr/bin/sh\n"; + print FOUT "$performList[$i]\n"; + +# print OUT_1 "$outfile.map\n"; + close (FOUT); +} +close (FSCRIPLIST); + +open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n"; +$basename=`basename $performListFile`; +chop($basename); +$taskname=$taskname."_".$basename; +#print $basename; +while(<IN_2>){ + $f = $_; + chomp $f; + @temp = split("/", $f); + $base = pop @temp; + + #use default queues +# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n"; + #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + +} +close IN_2 or die "can't close the input file : $!"; + +#start to check stats of qsub tasks +####################################### +my $taskname_query=substr($taskname,0,10); + + +$status=0; +$sec=60; +while(1) +{ + $chkresult=`qstat |grep $taskname_query |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=60; + } + print "QSTAT: $chkresult tasks running.....$taskname\n"; + sleep($sec); +} +print "QSUB: done: $taskname \n"; +###################################### + +# +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; +} |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/beta_fit.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/beta_fit.R Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,39 @@ +args = commandArgs(); +input_file=args[4]; +#input_file="control_a.0.1.flt.ratio.tmpca"; +#print (input_file); + + +library(MASS); + +p=array(0,dim=1000); + +for (i in 0:999) +{ + p[i]=0.001 +} + +if ( file.info(input_file)["size"]>0 ) +{ + +data=read.table(input_file); +col=1; +x=data[,col]; +x1=x; +if (length(x)>10) +{ + x1[x==0] <- .Machine$double.eps; + x1[x==1] <- (1-.Machine$double.eps); + xbar=mean(x1) + xvar=var(x1) + a <- (xbar*(1-xbar)/xvar - 1)*xbar + b <- (1-xbar)*a/xbar + (f=fitdistr(x1,"beta",list(shape1=a,shape2=b))) + for (i in 0:999) + { + p[i]=dbeta(i/1000,f[["estimate"]][["shape1"]],f[["estimate"]][["shape2"]]) + } +} + +} +write(p,file=paste(input_file,"fit",sep="."),ncolumns=1); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/bowtie2eland.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/bowtie2eland.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,175 @@ +use strict; + +my $bowtiefilename=$ARGV[0]; +my $readsfilename=$ARGV[1]; +my $elandfilename=$ARGV[2]; + +open(readsfile, $readsfilename); + +my $detectformat=`head -c 1 $readsfilename`; + +#my $firstletter=$detectformat; +#my $looplinenumbers=4; + +#$looplinenumbers=2 if ($detectformat eq ">"); +open(bowtiefile, $bowtiefilename); +open(elandfile, ">".$elandfilename); +my $readfilelinenum=0; +# hash the positions of the alignments for each read id +my %readposhash; +my $bowtiepos = tell (bowtiefile); +while (my $bowtieline=<bowtiefile>) +{ + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + if (not exists $readposhash{$bowtiereadname} ) + { + $readposhash{$bowtiereadname} = $bowtiepos; + } + $bowtiepos = tell (bowtiefile); +} + +while(my $readline=<readsfile>) +{ + $readfilelinenum++; + if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + if( not exists $readposhash{$readname} ) + { + print elandfile $readname,"\tNA\tNM\n"; + next; + } + else + { + my @mapped_ids=(); + my @mapped_pos=(); + my @mapped_strand=(); + seek(bowtiefile, $readposhash{$readname}, 0); + while (my $bowtieline=<bowtiefile>) + { + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + if($readname eq $bowtiereadname) + { + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$pos); + push(@mapped_strand,$strand); + } + else + { + last; + } + + } + print elandfile $readname,"\t"; + print elandfile "NA\t"; + print elandfile scalar(@mapped_ids),":0:0\t"; + for(my $i=0;$i<@mapped_ids;$i++) + { + print elandfile "/",$mapped_ids[$i]; + print elandfile ":",$mapped_pos[$i]+1; + if($mapped_strand[$i] eq "+") + { + print elandfile "F0,"; + } + else + { + print elandfile "R0,"; + } + + } + print elandfile "\n"; + + } + } +} + +close(elandfile); +close(bowtiefile); +close(readsfile); + +exit; +while(my $bowtieline=<bowtiefile>) +{ + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + while(my $readline=<readsfile>) + { + $readfilelinenum++; + if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) + #if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + + + if($readname ne $bowtiereadname) + { + print elandfile $readname,"\tNA\tNM\n"; + next; + } + else + { + my @mapped_ids=(); + my @mapped_pos=(); + my @mapped_strand=(); + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$pos); + push(@mapped_strand,$strand); + while(1) + { + $bowtieline=<bowtiefile>; + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + if( $bowtiereadname eq $readname ) + { + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$pos); + push(@mapped_strand,$strand); + } + else + { + seek(bowtiefile, -1*length($bowtieline),1); + print elandfile $readname,"\t"; + print elandfile "NA\t"; + print elandfile scalar(@mapped_ids),":0:0\t"; + for(my $i=0;$i<@mapped_ids;$i++) + { + print elandfile "/",$mapped_ids[$i]; + print elandfile ":",$mapped_pos[$i]+1; + if($mapped_strand[$i] eq "+") + { + print elandfile "F0,"; + } + else + { + print elandfile "R0,"; + } + + } + print elandfile "\n"; + last; + } + } + last; + + } + } + } +} + +while(my $readline=<readsfile>) +{ + $readfilelinenum++; + if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) + #if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + print elandfile $readname,"\tNA\tNM\n"; + } +} + +close(elandfile); +close(bowtiefile); + + +close(readsfile); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/calc_pval.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/calc_pval.R Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,29 @@ +args = commandArgs(); +input_file=args[4]; +av=as.numeric(args[5]); +sd=as.numeric(args[6]); +nu=as.numeric(args[7]); + + +data=read.table(input_file); + +col=4; +x=data[,col]; +pup=pnorm(x, mean=av, sd=sd, lower.tail = FALSE); +adpup=p.adjust(pup,method="fdr"); +pdn=pnorm(x, mean=av, sd=sd, lower.tail = TRUE); +adpdn=p.adjust(pdn,method="fdr"); + +p=pup; + +size = length(x); + +for (i in 1:size) +{ + if(x[i]<0) + { + p[i]=pdn[i]; + } +} +write(p, file=paste(input_file,"p",sep="."),ncolumns=1); + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/downloaddb.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/downloaddb.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,54 @@ +use strict; +use Cwd; + +my %flags=( + "hg18"=>0, + "mm9"=>0, + "rn4"=>0, +); +my $PROG = $0; +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +$PROG_ABS_PATH = `dirname $PROG_ABS_PATH`; +chomp($PROG_ABS_PATH); +print "\n\tPrepare to download databases from CSHL...\n"; + +print "\tWhich database(s) do you want to download?\n"; +print "\tChoose from\t "; +foreach my $key (keys %flags) +{ + print $key,"/"; +} +print "ALL \n\n\tseparated by blank...don't enter anything if you don't want to download!\n\nPlease enter:[NONE]"; + +my @dbnames = split(/\s+/,<>); +for (my $i=0;$i<@dbnames;$i++) +{ + if (uc($dbnames[$i]) eq "ALL" ) + { + foreach my $key (keys %flags) + { + $flags{$key}=1; + } + last; + } + + if( exists $flags{$dbnames[$i]}) + { + $flags{$dbnames[$i]} = 1; + } + #system ("wget http://rulai.cshl.edu/splicetrap/db/") +} + +foreach my $key (keys %flags) +{ + if ($flags{$key} ==1) + { + system ("wget http://rulai.cshl.edu/splicetrap/db/".$key.".tar.gz"); + print "untar the database file for $key...please wait..."; + system ("tar -ixzf $key.tar.gz"); + system ("rm $key.tar.gz"); + mkdir "$key/parallel"; + print "creating files for parallel computing...\n"; + system("bash $PROG_ABS_PATH/splitdb.sh $key/parallel") + } +} |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/get.frag.size.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/get.frag.size.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,55 @@ +#for the results from paired end +#The two inputs are the results from the two ends +use strict; +my $read_size = $ARGV[2]; +open(input1, $ARGV[0]); +open(input2, $ARGV[1]); +open(output, ">$ARGV[0].fragsize"); +#open(fusefile,">$ARGV[0].fuse"); + +#my $LongMarker="L"; +#my $ShortMarker="S"; + +while(my $line1=<input1>) +{ + my $line2=<input2>; + chomp($line1); + chomp($line2); + next if($line1=~/$\NM/ or $line2=~/$\NM/ or $line1=~/$\MT/ or $line2=~/$\MT/); + my @array1 = split("\t",$line1); + #my $read_size=length($array1[1]); + my @array2 = split("\t",$line2); + my $match1=$array1[3]; + my $match2=$array2[3]; +# my $marker=$LongMarker.$ShortMarker; + my @sizes=(); + #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g) + while($match1=~/\/(\S[^,]*\[\w+\])\S[^,]*:(\d*)[RF]/g) + { + my $name=$1; + my $posa=$2; + #print $name,"\n"; + + if($match2=~/\Q$name\E\S[^,]*:(\d*)[RF]/) + { + #print "match\n"; + my $posb=$1; + push @sizes, abs($posb-$posa)+$read_size; + + } + + } + my %saw; + @saw{@sizes}=(); + my @keya= keys %saw; + #print scalar(@keya),"\n"; + if(scalar(@keya)==1) + { + print output $keya[0],"\n"; + } + +} +#close(fusefile); +close(output); +close(input2); +close(input1); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/get.hist.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/get.hist.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,49 @@ +use Getopt::Long; +use strict; + +my $InputFileName=$ARGV[0]; +my $OutputFileName = $ARGV[0].".hist"; +#$OutputFileName=$ARGV[1] if $ARGV[1] ne ""; +my $width=0.01; +my $verbose=1; +my $col=2; +my $start=0; +my $end=1.000; + + +GetOptions ( + 'w:f'=>\$width, + 'c:i'=>\$col, + 'start:f'=>\$start, + 'end:f'=>\$end, + 'v'=>\$verbose +); + +$width=$width*1; +#print "IRM: #Generate hist with delta width of $width \n"; +#print "IRM: #data source from col $col\n"; + +$col=$col-1; + +my @hist; +my $totalnum=0; + +open(Input, $InputFileName); +while(my $line=<Input>) +{ + next if($line=~/^#/); + chomp($line); + my @array=split(/\s/,$line); + $hist[int($array[$col]/$width)]++; + $totalnum++; + +} +close(Input); + +open(OutputFile, ">$OutputFileName"); +print OutputFile "#Width:$width\n"; +for(my $i=0;$i<@hist;$i++) +{ + print OutputFile $hist[$i]/$totalnum,"\n"; +} +close(OutputFile); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/get_bed_fa_j.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/get_bed_fa_j.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
b'@@ -0,0 +1,330 @@\n+# Adapted from Chenghai Xue\'s script\n+\n+$starttime=time();\n+\n+$input_file_1 = $ARGV[0];\t# exon junction file\n+$input_file_2 = $ARGV[1];\t# genome file list\n+$output_file_1 = $ARGV[2];\t# exon junction bed (might be less than input_file_1\n+$output_file_2 = $ARGV[3];\t# exon junction fa\n+#$leftLen = $ARGV[4];\n+#$rightLen = $ARGV[5];\n+\r\n+open(IN_1, "$input_file_1") or die "can\'t open the input file : $!";\n+open(IN_2, "$input_file_2") or die "can\'t open the input file : $!";\n+open OUT_1, ">$output_file_1" or die "Can not open output_file : $!";\r\n+open OUT_2, ">$output_file_2" or die "Can not open output_file : $!";\r\n+\n+@chromList = (<IN_2>);\n+chomp(@chromList);\n+$len_chromList = @chromList;\n+print "BED2FA: in $input_file_2, found $len_chromList chromosomes\\n";\n+foreach $one (@chromList){\n+\tif($one =~ /\\/(chr.[^\\/]*?)\\.*fa$/i){\n+\t\t$chr_hash{$1} = $one;\n+\t\t#print $1,"\\n";\t\n+\t}\n+}\n+@key_chr_hash = keys(%chr_hash);\r\n+$len_key_chr_hash = @key_chr_hash;\r\n+@sort_key_chr_hash = sort_chromNo(@key_chr_hash);\n+$len_sort_key_chr_hash = @sort_key_chr_hash;\n+#for($i=0; $i<$len_sort_key_chr_hash; $i++){\r\n+#\tprint "$sort_key_chr_hash[$i]\t$chr_hash{$sort_key_chr_hash[$i]}\\n";\r\n+#}\n+\n+$num_1=0;\n+$num_2=0;\n+$num_count_chrom=0;\n+my ($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts);\r\n+$current_chrom = "";\n+while(<IN_1>){\n+\t$num_1++;\r\n+\t$line = $_;\n+\tchomp $line;\n+\t#print $line,"\\n";\n+\t@cols = split ("\\t", $line);\n+ if(scalar(@cols)==12)\n+\t{\n+\t($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts) = @cols;\n+\t}\n+\tif(scalar(@cols)!=12)\n+\t{\n+\t\t($chrom, $chromStart, $chromEnd, $name, $score, $strand)=@cols;\n+\t\t$thickStart=$chromStart;\n+\t#\tprint $thickStart,"\\n";\n+\t\t$thickEnd = $chromEnd;\n+\t\t$blockCount=1;\n+\t\t$blockSizes=$chromEnd-$chromStart;\n+\t\t$blockStarts = 0;\t\n+\t}\n+\t$strand="+" if !$strand;\n+\t@a_blockSizes = split (/\\,/, $blockSizes);\n+\t@a_blockStarts = split (/\\,/, $blockStarts);\n+\tif($chrom ne $current_chrom){\n+\t\tif($num_1 != 1){\n+\t\t\tprint "$num_chr_1\t$num_chr_2\t$len_contigSeqStr\\n";\t\t\t\n+\t\t}\n+\t\tprint "BED2FA: $chrom:\t";\n+\t\t\n+\t\t$num_chr_1=0;\n+\t\t$num_chr_2=0;\t\t\n+\n+\t\tif(exists $chr_hash{$chrom}){\n+\t\t\t$num_count_chrom++;\n+\t\t\t$current_chrom = $chrom;\n+\t\t\t#print $current_chrom,"\\n";\n+#=pod\t\t\t\n+\t\t\t$chromFastaFile = $chr_hash{$chrom};\n+\t\t\t#print $chromFastaFile,"\\n";\n+\t\t\topen($fin, "<$chromFastaFile") or die "can\'t open the chrom file : $!";\n+\t\t\tlocal ($/) = undef;\n+\t\t\t$contigSeqStr = <$fin>;\n+\t\t\tclose ($fin);\n+\t\t\t#print $contigSeqStr,"mark\\t";\n+\t\t\t$contigSeqStr =~s/^\\>.*?\\n//g;\n+ #print $contigSeqStr,"mark2\\t";\n+\n+\t\t\t$contigSeqStr =~s/\\s|\\n//g;\n+ #print $contigSeqStr,"mark3\\n";\n+\n+\t\t\t$len_contigSeqStr = length $contigSeqStr;\n+#=cut\n+\t\t}\n+\t\telse{\n+\t\t\t$num_chr_1++;\n+\t\t\tnext;\n+\t\t}\n+\t}\n+\t$num_chr_1++;\n+\t\r\n+# modify from here................................\n+\tmy @Starts;\n+\tmy @Ends;\n+\tmy @JuncSeq;\n+\tmy $ssStrTag=1;\n+\tfor($i_wuj=0;$i_wuj<$blockCount;$i_wuj++)\n+\t{\n+\t\t$Starts[$i_wuj] = $chromStart + $a_blockStarts[$i_wuj];\n+\t\t$Ends[$i_wuj] = $Starts[$i_wuj] + $a_blockSizes[$i_wuj];\n+\t\t$JuncSeq[$i_wuj] = uc substr ($contigSeqStr,$Starts[$i_wuj], $a_blockSizes[$i_wuj]);\n+\t\tif($strand eq "-"){\n+\t\t $JuncSeq[$i_wuj] = uc string_reverse_complement(lc $JuncSeq[$i_wuj]);\n+\t\t}\n+\t}\t \n+ # for($i_wuj=0;$i_wuj<$blockCount-1;$i_wuj++)\n+#\t{\n+#\t $ssStr = uc substr ($contigSeqStr, $Ends[$i_wuj], 2) . substr ($contigSeqStr, $Starts[$i_wuj+1] - 2, 2);\n+#\t if($strand eq "-"){\n+ # $ssStr = uc string_reverse_complement(lc $ssStr);\n+ #$ssStr = $rc_ssStr;\n+#\t }\n+#\t\t$ssStrTag = 0 if ($ssStr ne "GTAG");\n+\t\t\n+ # }\n+#\tif($ssStrTag ==1){\n+ if(1){\n+\t\t$num_2++;\n+\t\t$num_chr_2++;\n+\t\tprint OUT_1 "$line\\n";\n+\t\t#print OUT_2 ">$name\\|$chrom\\|$chromStart\\|$chromEnd\\|$strand\\|$ssStr\\|$num_2\\n$junctionSeqStrLeft'..b'ret;\r\n+}\n+\r\n+sub sort_chromNo{\n+\tlocal(@chrom) = @_;\n+\tlocal($len_key_chr_hash, $i, @sort_chr_hash);\n+\tlocal(@digit_random, @words_random, @digit_other_1, @digit_other_2, @words_other_1, @words_other_2, @digit, @words);\n+\tlocal(@sort_digit, @sort_words, @sort_digit_random, @sort_words_random, @sort_digit_other, @sort_words_other);\n+\tlocal($len_digit, $len_words, $len_digit_random, $len_words_random, $len_digit_other, $len_words_other, $term);\n+\t\n+\t$len_key_chr_hash = @chrom;\n+\t# sort via chr number for printing result\r\n+\tfor($i=0; $i<$len_key_chr_hash; $i++){\n+\t\tif($key_chr_hash[$i] =~ /chr(\\d+)\\_random/){\r\n+\t\t\tpush(@digit_random, $1);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_random/){\r\n+\t\t\tpush(@words_random, $1);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)\\_([\\w\\d\\_]+)/){\r\n+\t\t\tpush(@digit_other_1, $1);\n+\t\t\tpush(@digit_other_2, $2);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_([\\w\\d\\_]+)/){\r\n+\t\t\tpush(@words_other_1, $1);\n+\t\t\tpush(@words_other_2, $2);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)/){\r\n+\t\t\tpush(@digit, $1);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)/){\r\n+\t\t\tpush(@words, $1);\r\n+\t\t}\r\n+\t\telse{\r\n+\t\t\tprint "BED2FA: There is unknown type of chromosomes: $key_chr_hash[$i]\\n";\r\n+\t\t}\r\n+\t}\r\n+\t@sort_digit = sort by_mostly_numeric @digit;\r\n+\t@sort_words = sort by_mostly_string @words;\n+\t@sort_digit_random = sort by_mostly_numeric @digit_random;\r\n+\t@sort_words_random = sort by_mostly_string @words_random;\n+\t@sort_digit_other = sort_2_array_number_string(\\@digit_other_1, \\@digit_other_2);\r\n+\t@sort_words_other = sort_2_array_string_string(\\@words_other_1, \\@words_other_2);\r\n+\t\r\n+\t$len_digit = @sort_digit;\r\n+\tfor($i=0; $i<$len_digit; $i++){\r\n+\t\t$term = "chr".$sort_digit[$i];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\r\n+\t$len_words = @sort_words;\r\n+\tfor($i=0; $i<$len_words; $i++){\r\n+\t\t$term = "chr".$sort_words[$i];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\n+\t$len_digit_random = @sort_digit_random;\r\n+\tfor($i=0; $i<$len_digit_random; $i++){\r\n+\t\t$term = "chr".$sort_digit_random[$i]."_random";\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\r\n+\t$len_words_random = @sort_words_random;\r\n+\tfor($i=0; $i<$len_words_random; $i++){\r\n+\t\t$term = "chr".$sort_words_random[$i]."_random";\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\t\r\n+\t$len_digit_other = @sort_digit_other;\r\n+\tfor($i=0; $i<$len_digit_other; $i=$i+2){\r\n+\t\t$term = "chr".$sort_digit_other[$i]."_".$sort_digit_other[$i+1];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\r\n+\t$len_words_other = @sort_words_other;\r\n+\tfor($i=0; $i<$len_words_other; $i=$i+2){\r\n+\t\t$term = "chr".$sort_words_other[$i]."_".$sort_words_other[$i+1];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\t\n+\t\n+\treturn @sort_chr_hash;\n+}\r\n+\n+sub sort_2_array_number_string{\r\n+\tlocal($a, $b) = @_;\r\n+\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n+\tlocal(@ret);\r\n+\t\r\n+\t$len_a = @$a;\r\n+\t$len_b = @$b;\r\n+\tif($len_a == $len_b){\r\n+\t\tfor($i=0; $i<$len_a; $i++){\r\n+\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n+\t\t}\r\n+\t\tfor $one (sort by_mostly_numeric keys %family) {\r\n+\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n+\t\t\t\t\tpush(@ret, $one);\r\n+\t\t\t\t\tpush(@ret, $two);\r\n+\t\t\t}\t\r\n+\t\t}\r\n+\t}\r\n+\telse{\t\t\r\n+\t\tprint "ERROR: Sort array is not same size\\n";\r\n+\t\tprint "a $len_a, b $len_b\\n";\r\n+\t}\t\r\n+\t\r\n+\treturn @ret;\r\n+}\n+\n+sub sort_2_array_string_string{\r\n+\tlocal($a, $b) = @_;\r\n+\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n+\tlocal(@ret);\r\n+\t\r\n+\t$len_a = @$a;\r\n+\t$len_b = @$b;\r\n+\tif($len_a == $len_b){\r\n+\t\tfor($i=0; $i<$len_a; $i++){\r\n+\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n+\t\t}\r\n+\t\tfor $one (sort by_mostly_string keys %family) {\r\n+\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n+\t\t\t\t\tpush(@ret, $one);\r\n+\t\t\t\t\tpush(@ret, $two);\r\n+\t\t\t}\t\r\n+\t\t}\r\n+\t}\r\n+\telse{\t\t\r\n+\t\tprint "ERROR: Sort array is not same size\\n";\r\n+\t\tprint "a $len_a, b $len_b\\n";\r\n+\t}\t\r\n+\t\r\n+\treturn @ret;\r\n+}\n+\n+sub by_mostly_numeric{\r\n+#\t( $a <=> $b ) || ( $a cmp $b );\r\n+\t( $a <=> $b );\r\n+}\r\n+\r\n+sub by_mostly_string{\r\n+#\t( $a <=> $b ) || ( $a cmp $b );\r\n+\t( $a cmp $b );\r\n+}\n+\r\n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/get_event_dist_fit.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/get_event_dist_fit.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,108 @@ +use Getopt::Long; +use strict; + +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +my $SrcFolder=`dirname $PROG_ABS_PATH`; +chomp($SrcFolder); + +#my $SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8/"; +my $InputFileName=$ARGV[0]; +my $OutputFileName = $ARGV[0].".hist"; +#$OutputFileName=$ARGV[1] if $ARGV[1] ne ""; +my $width=0.001; +my $verbose=1; +my $col=2; + +GetOptions ( + 'w:f'=>\$width, + 'c:i'=>\$col, + 'v'=>\$verbose +); + +$width=$width*1; +my $binnum=1/$width; +$col=$col-1; + +my @CAratios; +my @CSratios; +my @ADratios; +my @AAratios; +my @AIratios; +my @IRratios; + +open(Input, $InputFileName); +while(my $line=<Input>) +{ + next if($line=~/^#/); + chomp($line); + my @array=split(/\s/,$line); + next if($array[$col]<=0.001 or $array[$col]>=0.999); +# push( @CAratios,$array[$col]) if($array[0]=~/^C[AS]/ or $array[0]=~/^ME/); + push( @CAratios,$array[$col]) if($array[0]=~/^CA/ or $array[0]=~/^ME/); + push( @CSratios,$array[$col]) if($array[0]=~/^CS/); + push( @ADratios,$array[$col]) if($array[0]=~/^AD/); + push( @AAratios,$array[$col]) if($array[0]=~/^AA/); + push( @AIratios,$array[$col]) if($array[0]=~/^AI/); + push( @IRratios,$array[$col]) if($array[0]=~/^IR/); + +} + +close(Input); + +open(tmpFile, ">$InputFileName.tmpca"); +for(my $i=0;$i<@CAratios;$i++) +{ + print tmpFile $CAratios[$i],"\n"; +} +close(tmpFile); +open(tmpFile, ">$InputFileName.tmpcs"); +for(my $i=0;$i<@CSratios;$i++) +{ + print tmpFile $CSratios[$i],"\n"; +} +close(tmpFile); + + +open(tmpFile, ">$InputFileName.tmpad"); +for(my $i=0;$i<@ADratios;$i++) +{ + print tmpFile $ADratios[$i],"\n"; +} +close(tmpFile); + +open(tmpFile, ">$InputFileName.tmpaa"); +for(my $i=0;$i<@AAratios;$i++) +{ + print tmpFile $AAratios[$i],"\n"; +} +close(tmpFile); + +open(tmpFile, ">$InputFileName.tmpai"); +for(my $i=0;$i<@AIratios;$i++) +{ + print tmpFile $AIratios[$i],"\n"; +} +close(tmpFile); + +open(tmpFile, ">$InputFileName.tmpir"); +for(my $i=0;$i<@IRratios;$i++) +{ + print tmpFile $IRratios[$i],"\n"; +} +close(tmpFile); + +system("R --slave --args $InputFileName.tmpca <$SrcFolder/beta_fit.R"); +system("R --slave --args $InputFileName.tmpad <$SrcFolder/beta_fit.R"); +system("R --slave --args $InputFileName.tmpaa <$SrcFolder/beta_fit.R"); +#system("R --slave --args $InputFileName.tmpai <$SrcFolder/R/beta_fit.R"); +system("R --slave --args $InputFileName.tmpir <$SrcFolder/beta_fit.R"); +system("R --slave --args $InputFileName.tmpcs <$SrcFolder/beta_fit.R"); + +system("echo '#Width:$width' >$InputFileName.fit.hist"); +#system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpai.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist"); + +system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist"); +#system("rm $InputFileName.tmpca* $InputFileName.tmpad* $InputFileName.tmpaa* $InputFileName.tmpai* $InputFileName.tmpir* "); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/gtf2bed.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/gtf2bed.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,81 @@ +# rewrite on Sep 7th,2022 + +#part of package SpliceTrap + +#Jie Wu +use strict; + +my $inputfilename = $ARGV[0]; + +# input file is a gtf file, +# "transcript_id" is required for each line and should not be ambiguous. +# only the "exon" lines are used + +my %chr_hash; +my %strand_hash; +my %tx_exons; #tx_exons{$tx_id){$start} = $size; + +my $linenum = 0; + +open(input, $inputfilename); + +while(my $line=<input>) +{ + $linenum++; + my @a = split("\t",$line); + if ($a[2] eq "exon") + { + my $txid; + if($a[8]=~/transcript_id "(\S*?)"/) + { + $txid = $1; + } + else + { + die ("$inputfilename format error! No transcript_id in line $linenum \n"); + } + + if( exists $chr_hash{$txid} and $chr_hash{$txid} ne $a[0]) + { + warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped \n"); + next; + } + if( exists $strand_hash{$txid} and $strand_hash{$txid} ne $a[6]) + { + warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped\n"); + } + $chr_hash{$txid} = $a[0]; + $strand_hash{$txid} = $a[6]; + $tx_exons{$txid}{$a[3]} = $a[4] - $a[3] +1; + + } + +} + +foreach my $txid (keys %chr_hash) +{ + my @starts; + my @sizes; + foreach my $start (sort {$a<=>$b} (keys %{$tx_exons{$txid}} ) ) + { + push (@starts, $start); + push (@sizes, $tx_exons{$txid}{$start}); + } + my $exon_num = scalar(@sizes); + my $starts_str = ""; + for(my $i = 0; $i < $exon_num; $i++) + { + $starts_str = $starts_str.($starts[$i] - $starts[0]).","; + if($i>0) + { + warn "$txid, intron size..".($starts[$i]-$starts[$i-1])."\n" if ($starts[$i]-$starts[$i-1]>1000000); + } + } + my $sizes_str = join(",",@sizes); + my $end = $starts[$exon_num-1] + $sizes[$exon_num-1] -1; + print join("\t",$chr_hash{$txid}, $starts[0]-1, $end, $txid,"0",$strand_hash{$txid},$starts[0]-1, $end, "255,0,0",$exon_num,$sizes_str, $starts_str); + print "\n"; +} + + +close(input); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/mapping_bowtie.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/mapping_bowtie.sh Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,89 @@ +#!/bin/bash +#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8' +InputFileName=$1 +faorfq=$2 +DatabasePrefix=$3 +Outputfolder=$4 +SrcFolder=$5 +Threads=$6 +DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/btw/TXdb' +TmpFolderName=`basename $1` +#fasta or fastq + +cd $Outputfolder; +#prepare the folder +if [ -d $TmpFolderName.result ];then + echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !" + echo "MAPPING: !!!change the name of that folder first in case I erase them..." + exit +fi +echo "MAPPING: Start mapping $InputFileName...Creating cache folder $TmpFolderName.result" +mkdir $TmpFolderName".result" +mkdir $TmpFolderName".result"/cache + +cd $TmpFolderName".result" +cd cache +echo "MAPPING: Split to pieces ..." +split -l 1000000 $InputFileName +for name in x* +do + + if [ $faorfq == "fasta" ];then + add="-f" + fi +# if [ $name != $InputFileName ];then + echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh + echo $name >>checklist +# fi +done + +echo "MAPPING: submit scripts..." +perl $SrcFolder/batchqsub.pl map.sh + +tasknum=`wc -l map.sh |tr -d "\n"` +#checking.. + +echo "MAPPING: mapping $InputFileName to TXdb done...start to check.." +while [ 1 ] +do + if [ -f mapcheck.sh ];then + rm mapcheck.sh + fi + while read checklist + do + + name=`echo $checklist |tr -d "\n"` + echo "MAPPING: checking $name...." + readnum=`wc -l $name | cut -f1 -d" "` + if [ $faorfq == "fasta" ];then + readnum=`echo "$readnum/2"|bc` + else + readnum=`echo "$readnum/4"|bc` + fi + if [ -f $name.nomt ];then + bowtienum=`wc -l $name.nomt | cut -f1 -d" "` + else + bowtienum=0 + fi + if [ $bowtienum != $readnum ];then + echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh + + fi + done <checklist + if [ -f mapcheck.sh ];then + checktasknum=`wc -l mapcheck.sh |tr -d "\n"` + if [ $checktasknum == $tasknum ];then + echo "MAPPING: warning! none of the mapping tasks properly finished!" + fi + echo "MAPPING: resubmit TASKS...." + perl $SrcFolder/batchqsub.pl mapcheck.sh + else + break + fi +done +echo "MAPPING: Done.....merging files..." +cat *.nomt >$Outputfolder/$TmpFolderName.nomt +cd ../../ +rm $TmpFolderName.result -rf +#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a $DatabaseFolderTXdb -f + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/mapping_rmap.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/mapping_rmap.sh Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,80 @@ +#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a --best /data/zhang/wuj/database/hg18/AS/TXdb.2/btw/TXdb -f -t s_1_sequence.txtparta >s_1.map & +#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8' +InputFileName=$1 +faorfq=$2 +DatabasePrefix=$3 +Outputfolder=$4 +SrcFolder=$5 +DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/TXdb.fasta' +TmpFolderName=`basename $1` + +cd $Outputfolder; +#prepare the folder +if [ -d $TmpFolderName.result ];then + echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !" + echo "MAPPING: !!!change the name of that folder first in case I erase them..." + exit +fi +echo "MAPPING: Start to map $InputFileName....Creating cache folder $TmpFolderName.result" +mkdir $TmpFolderName".result" +cd $TmpFolderName".result" +mkdir cache stat +cd cache +echo "MAPPING: Split file..." +split -l 1000000 $InputFileName +echo "MAPPING: generating shell scirpts...." +for name in x* +do + + echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh + echo $name >>checklist +done +tasknum=`wc -l map.sh |tr -d "\n"` +perl $SrcFolder/batchqsub.pl map.sh + +echo "MAPPING: map $InputFileName to TXdb done...start to check.." +while [ 1 ] +do + if [ -f mapcheck.sh ];then + rm mapcheck.sh + fi + while read checklist + do + + name=`echo $checklist |tr -d "\n"` + echo "MAPPING: checking $name...." + readnum=`wc -l $name | cut -f1 -d" "` + if [ $faorfq == "fasta" ];then + readnum=`echo "$readnum/2"|bc` + else + readnum=`echo "$readnum/4"|bc` + fi + if [ -f $name.nomt ];then + rmapnum=`wc -l $name.nomt | cut -f1 -d" "` + else + rmapnum=0 + fi + if [ $rmapnum != $readnum ];then + echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh + + fi + done <checklist + if [ -f mapcheck.sh ];then + checktasknum=`wc -l mapcheck.sh |tr -d "\n"` + if [ $checktasknum == $tasknum ];then + echo "MAPPING: warning! none of the mapping tasks properly finished!" + fi + echo "MAPPING: resubmiting TASKS...." + perl $SrcFolder/batchqsub.pl mapcheck.sh + else + break + fi +done +echo "MAPPING: Mapping is really done....merging files..." + + +cat *.nomt >$Outputfolder/$TmpFolderName.nomt +cd ../../ + +rm $TmpFolderName.result -rf + |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/mark.mt.4eland.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/mark.mt.4eland.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,56 @@ +# this file is to convert mult mapped reads to nm reads by simply marked it as NM reads. +# its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM +# later, can be used to add information for dealing with this mult reads, for example, the coverage in the region + +use strict; +my $inputfilename=$ARGV[0]; +my $LongMarker="L"; +my $ShortMarker="S"; + + +open(input, $inputfilename); +while(my $line=<input>) +{ + #print "new line\n"; + chomp($line); + my @array = split("\t",$line); + my $match=$array[3]; + if( $array[2] eq "NM" or $match eq "") + { + print $line,"\n"; + next; + } + + my $marker=$LongMarker.$ShortMarker; + my @genome_pos; + #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g) + #this array is used to store the mapped position for this read + my @chr; + my @start; + my @end; + while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g) + { + push @chr, $1; + push @start, $2; + push @end, $3; + } + @chr=sort(@chr); + if (scalar(@chr)<=1) + { + print $line,"\n"; + next; + } + + @start=sort(@start); + @end=sort(@end); + if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000) + { + print $line, "\tMT\n"; + } + else + { + print $line,"\n"; + + } +} +close(input); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/rmap2eland.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/rmap2eland.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,103 @@ +use strict; + +my $rmapfilename=$ARGV[0]; +my $readsfilename=$ARGV[1]; +my $elandfilename=$ARGV[2]; + +my $detectformat=`head -c 1 $readsfilename`; + +#system("grep \"$detectformat\" $readsfilename |sort >$readsfilename.sort"); +system("awk 'NR%2==1' $readsfilename |sort >$readsfilename.sort"); +system("sort -k4,4 $rmapfilename >$rmapfilename.sort"); + + +open(readsfile, $readsfilename.".sort"); + + + +#$looplinenumbers=2 if ($detectformat eq ">"); +open(rmapfile, $rmapfilename.".sort"); +open(elandfile, ">".$elandfilename); + +while(my $rmapline=<rmapfile>) +{ + chomp($rmapline); + my ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline); + while(my $readline=<readsfile>) + { + if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + + + if($readname ne $rmapreadname) + { + print elandfile $readname,"\tNA\tNM\n"; + next; + } + else + { + my @mapped_ids=(); + my @mapped_pos=(); + my @mapped_strand=(); + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$start); + push(@mapped_strand,$strand); + while(1) + { + $rmapline=<rmapfile>; + chomp($rmapline); + ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline); + if( $rmapreadname eq $readname ) + { + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$start); + push(@mapped_strand,$strand); + } + else + { + seek(rmapfile, -1*length($rmapline)-1,1); + print elandfile $readname,"\t"; + print elandfile "NA\t"; + print elandfile scalar(@mapped_ids),":0:0\t"; + for(my $i=0;$i<@mapped_ids;$i++) + { + print elandfile "/",$mapped_ids[$i]; + print elandfile ":",$mapped_pos[$i]+1; + if($mapped_strand[$i] eq "+") + { + print elandfile "F0,"; + } + else + { + print elandfile "R0,"; + } + + } + print elandfile "\n"; + last; + } + } + last; + + } + } + } +} + +while(my $readline=<readsfile>) +{ + if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + print elandfile $readname,"\tNA\tNM\n"; + } +} + +close(elandfile); +close(rmapfile); + + +close(readsfile); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/scan_nomt.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/scan_nomt.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,98 @@ +use strict; +use FileHandle; + + +my $File1Name=$ARGV[0]; +my $File2Name=$ARGV[1]; + +my %FileHandle1; +my %FileHandle2; +my %chrlist; + +open(File1,$File1Name); +open(File2,$File2Name); + +while(my $line1=<File1>) +{ + my $line2=<File2>; + chomp($line1); + chomp($line2); + my $read1status=substr($line1,length($line1)-2,2); + my $read2status=substr($line2,length($line2)-2,2); + #next if( ($read1status eq "NM" or $read1status eq "MT") and ($read2status eq "NM" or $read2status eq "MT"); + my @array1=split("\t",$line1); + my @array2=split("\t",$line2); + my $chr1=""; + my $chr2=""; + if(scalar(@array1) eq 4) + { + if($array1[3]=~/(chr\S*?)\|/) + {$chr1=$1;} + } + if(scalar(@array2) eq 4) + { + if($array2[3]=~/(chr\S*?)\|/) + {$chr2=$1;} + } + my $chr=$chr1; + if ($chr eq "") + { + next if($chr2 eq ""); + $chr=$chr2; + } + else + { + next if($chr2 ne "" and $chr2 ne $chr); + + } + next if $chr eq ""; + if(exists $chrlist{$chr}) + { + my $fout1= $FileHandle1{$chr}; + my $fout2= $FileHandle2{$chr}; + if($read1status eq "MT") + { + print $fout1 $array1[0],"\t",$array1[1],"\tMT\n"; + } + else + { + print $fout1 $line1,"\n"; + } + if($read2status eq "MT") + { + print $fout2 $array2[0],"\t",$array2[1],"\tMT\n"; + } + else + { + print $fout2 $line2,"\n"; + } + + + + } + else + { + $chrlist{$chr}=1; + my $fout1= new FileHandle; + open($fout1, ">".$File1Name.".".$chr); + $FileHandle1{$chr}=$fout1; + + my $fout2= new FileHandle; + open($fout2, ">".$File2Name.".".$chr); + $FileHandle2{$chr}=$fout2; + + } + +} + +foreach my $fout1 (keys %FileHandle1) +{ + close($fout1); +} +foreach my $fout2 (keys %FileHandle2) +{ + close($fout2); +} + +close(File1); +close(File2); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/scanbed2txdb.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/scanbed2txdb.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
b'@@ -0,0 +1,490 @@\n+#argv0: input transcript bed file\n+#argv1: output filename, will be in AS format\n+\n+use strict;\n+my $AnnoFileName = $ARGV[0];\n+my $outputFileName = $ARGV[1];\n+\n+if($AnnoFileName eq "" or $outputFileName eq "")\n+{\n+\tprint "TXDBGEN: Please specify your input files\\n";\n+\texit;\n+}\n+\n+my $cachefolder = `basename $outputFileName`;\n+chomp($cachefolder);\n+my $cachefolder = $cachefolder.".cache";\n+my $AnnoFileBase = `basename $AnnoFileName`; \n+chomp($AnnoFileBase);\n+\n+if(! -e $cachefolder)\n+{\n+\tmkdir $cachefolder or die "CHECK: cannot mkdir $cachefolder\\n";\n+\tprint "TXDBGEN: mkdir $cachefolder\\n";\n+}\n+\n+my $CacheAnnoFileName = $cachefolder."/".$AnnoFileBase.".sort";\n+#sort the annotation file\n+\n+print "TXDBGEN: sort $AnnoFileName \\n";\n+\n+system("sort -k6,6 -k1,1 -k2,2n -k3,3n $AnnoFileName >$CacheAnnoFileName");\n+\n+$AnnoFileName = $CacheAnnoFileName;\n+\n+#read the annotations into hashes\n+\n+open(AnnoFile, $CacheAnnoFileName) or die "can not open",$CacheAnnoFileName;\n+#split the genes into contigs\n+\n+my $contigid = 0;\n+my $end_tmp = 0;\n+my $chr_tmp = "chr";\n+my $strand_tmp="NA";\n+\n+my $TXnumtmp=0;\n+\n+my %eventlist =();\n+#my %eventlist_af = ();\n+#my %eventlist_al = ();\n+my %evidences = ();\n+\n+open(my $output,">$outputFileName");\n+\n+open(my $output2, ">$outputFileName.evi");\n+while(my $line =<AnnoFile>)\n+{\n+\tchomp($line);\n+\tmy @a = split("\\t",$line);\n+\tmy $chr = $a[0];\n+\tmy $start = $a[1];\n+\tmy $end = $a[2];\n+\tmy $name = $a[3];\n+\tmy @sizes = split(",",$a[10]);\n+\tmy $strand = $a[5];\n+\tmy @start_shifts = split(",",$a[11]);\n+\t#my $chrstr = $chr.$strand;\n+\tmy @starts;\n+\tmy @ends;\n+\tfor(my $i=0;$i<@start_shifts;$i++)\n+ {\n+ $starts[$i]=$start_shifts[$i]+$start;\n+ $ends[$i] = $starts[$i]+$sizes[$i];\n+ }\n+\tif($start >$end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand)\n+ {\n+ $contigid++;\n+ #$ctgmultisonum++ if $TXnumtmp>1;\n+ $TXnumtmp=0;\n+\t\tmy $annos = scanevents(\\%eventlist, "inner") ;\n+\t\t#my $annos_af = scanevents(\\%eventlist_af, "af");\n+\t\t#my $annos_al = scanevents(\\%eventlist_al, "al");\n+\t\t\n+\t\tmy $stdout = select ($output);\n+\t\tmy $eventids = printanno($annos,$chr_tmp,$strand_tmp) if(scalar(%$annos)>0);\n+\t\t#printanno($annos_af,$chr_tmp,$strand_tmp) if(scalar(%$annos_af)>0);\n+\t\t#printanno($annos_al,$chr_tmp,$strand_tmp) if(scalar(%$annos_al)>0);\n+\t\tselect($stdout);\n+\t\t$stdout = select ($output2);\n+\t\t#print cross information\n+\t\tforeach my $connect_str (keys %$eventids) \n+\t\t{\n+\t\t\tprint $eventids->{$connect_str},"\\t";\n+\t\t\tforeach my $transcriptid (keys %{$evidences{$connect_str}}) \n+\t\t\t{\n+\t\t\t\tprint $transcriptid,",";\n+\t\t\t}\n+\t\t\tprint "\\n";\n+\t\t}\n+\t\tselect($stdout);\n+\t\tprint "TXDBGEN: Contig ID $contigid at $chr $strand...\\n" if ( $contigid%1000 == 0);\n+\t\t%eventlist = ();\n+\t\t#%eventlist_al = ();\n+\t\t#%eventlist_af = ();\n+ #print CacheContigFile "#ctg$contigid\\n";\n+\n+ }\n+\t$TXnumtmp++;\n+ $end_tmp = $end if ($end > $end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand);\n+ $chr_tmp = $chr;\n+\t\n+\t$strand_tmp = $strand;\n+\t\n+\t#scan connections, 2 and 3 exons for CA/CS/AF\n+#\tif(scalar(@starts)>2)\n+#\t{\n+#\t\tmy $connectionstr_af =$starts[0]."-".\n+#\t\t\t\t$ends[0].",".\n+#\t\t\t\t$starts[1]."-".\n+#\t\t\t\t$ends[1]."," ;\n+#\t\n+#\t\t$eventlist_af{$connectionstr_af} = $starts[0];\n+#\t\tmy $connectionstr_al = $starts[scalar(@starts)-2]."-".\n+#\t\t\t\t$ends[scalar(@starts)-2].",".\n+#\t\t\t\t$starts[scalar(@starts)-1]."-".\n+#\t\t\t\t$ends[scalar(@starts)-1]."," ;\n+#\t\t$eventlist_al{$connectionstr_al} = $starts[scalar(@starts)-2];\n+#\t}\n+\t\t#didn\'t consider direction yet\n+ #add 1 exon for IR\n+\tfor(my $n=1;$n<4;$n++)\n+ {\n+\t\tfor(my $i=0;$i<scalar(@starts)-$n+1;$i++)\n+ {\n+\t\t\tmy $connectionstr = "";\n+\t\t\tfor(my $j=$i;$j<$i+$n;$j++)\n+ {\n+ $connectionstr = $connectionstr .\n+ $starts[$j]."-".\n+ $en'..b'\t\tprint "0,";\n+\t\t\tprint $a[2]-$a[0],",";\n+\t\t\tprint $a[4]-$a[0],"\\n";\n+\t\t\t\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n+ print $id.".".$num,"[S]\\t";\n+\t\t\tmy $connectstr_ca= $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n+\t\t\t$eventids{$connectstr_ca} = $id.".".$num."[S]";\n+ print "0\\t";\n+ print $strand,"\\t";\n+ print $a[0],"\\t",$a[5],"\\t";\n+ print "255,0,0\\t";\n+ print "2\\t"; \n+ print $a[1]-$a[0],",";\n+ #print $a[3]-$a[2],",";\n+ print $a[5]-$a[4],"\\t";\n+ print "0,";\n+ #print $a[2]-$a[0],",";\n+ print $a[4]-$a[0],"\\n";\n+\t\t\t\n+\t\t}\n+\t\tif($annos->{$key} eq "ir")\n+\t\t{\n+\t\t\tmy @a=split(/[,-]/,$key);\n+\n+\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n+\t\t\tmy $id="IR-IR-$chrid"."-".$a[1]."-".$a[2];\n+\t\t\tmy $num=0;\n+\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n+\t\t\t$nums_per_isoform{$id}++;\n+\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n+\t\t\tprint $id.".".$num,"[L]\\t";\n+\t\t\tprint "0\\t";\n+\t\t\tprint $strand,"\\t";\n+\t\t\tprint $a[0],"\\t",$a[3],"\\t";\n+\t\t\t$eventids{$a[0]."-".$a[3].","} = $id.".".$num."[L]";\n+\t\t\tprint "255,0,0\\t";\n+\t\t\tprint "3\\t";\n+\t\t\tprint $a[1]-$a[0],",";\n+\t\t\tprint $a[2]-$a[1],",";\n+\t\t\tprint $a[3]-$a[2],"\\t";\n+\t\t\tprint "0,";\n+\t\t\tprint $a[1]-$a[0],",";\n+\t\t\tprint $a[2]-$a[0],"\\n";\n+\t\t\t\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n+ print $id.".".$num,"[S]\\t";\n+\t\t\t$eventids{$key} = $id.".".$num."[S]";\n+ print "0\\t";\n+ print $strand,"\\t";\n+ print $a[0],"\\t",$a[3],"\\t";\n+ print "255,0,0\\t";\n+ print "2\\t"; \n+ print $a[1]-$a[0],",";\n+ #print $a[3]-$a[2],",";\n+ print $a[3]-$a[2],"\\t";\n+ print "0,";\n+ #print $a[2]-$a[0],",";\n+ print $a[2]-$a[0],"\\n";\n+\t\t\n+\t\t}\n+\t\tif($annos->{$key} eq "ss")\n+\t\t{\n+\t\t\tmy @a=split(/[,-]/,$key);\n+\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n+\t\t\tmy $type="AA";\n+\t\t\tif( ($strand eq "+" && $a[1]==$a[2]) or ($strand eq "-" && $a[3]==$a[4]))\n+\t\t\t{\n+\t\t\t\t$type="AD";\n+\t\t\t}\n+\t\t\tmy $id="$type-$type-$chrid"."-".$a[2]."-".$a[3];\n+\t\t\tmy $connect_str_L = "";\n+\t\t\tmy $connect_str_S = "";\n+\t\t\tif($a[1]==$a[2])\n+\t\t\t{\n+\t\t\t\t$connect_str_L = $a[0]."-".$a[3].",".$a[4]."-".$a[5].",";\n+\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n+\t\t\t}\n+\t\t\telse\n+\t\t\t{\n+\t\t\t\t$connect_str_L = $a[0]."-".$a[1].",".$a[2]."-".$a[5].",";\n+\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[3]."-".$a[5].",";\n+\t\t\t\t\n+\t\t\t}\n+\t\t\tmy $num=0;\n+\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n+\t\t\t$nums_per_isoform{$id}++;\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n+\t\t\tprint $id.".".$num,"[L]\\t";\n+\t\t\t$eventids {$connect_str_L} = $id.".".$num."[L]";\n+\t\t\tprint "0\\t";\n+\t\t\tprint $strand,"\\t";\n+\t\t\tprint $a[0],"\\t",$a[5],"\\t";\n+\t\t\tprint "255,0,0\\t";\n+\t\t\tprint "3\\t";\n+\t\t\tprint $a[1]-$a[0],",";\n+\t\t\tprint $a[3]-$a[2],",";\n+\t\t\tprint $a[5]-$a[4],"\\t";\n+\t\t\tprint "0,";\n+\t\t\tprint $a[2]-$a[0],",";\n+\t\t\tprint $a[4]-$a[0],"\\n";\n+\t\t\t\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n+ print $id.".".$num,"[S]\\t";\n+\t\t\t$eventids {$connect_str_S} = $id.".".$num."[S]";\n+ print "0\\t";\n+ print $strand,"\\t";\n+ print $a[0],"\\t",$a[5],"\\t";\n+ print "255,0,0\\t";\n+ print "2\\t"; \n+ print $a[1]-$a[0],",";\n+ #print $a[3]-$a[2],",";\n+ print $a[5]-$a[4],"\\t";\n+ print "0,";\n+ #print $a[2]-$a[0],",";\n+ print $a[4]-$a[0],"\\n";\n+\t\n+\t\n+\t\t}\n+\n+\t}\n+\treturn(\\%eventids);\n+\t\n+}\n+\n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/splitdb.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/splitdb.sh Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,11 @@ +workingfolder=$1; +cd $workingfolder +cut -f1 ../TXdb.bed |uniq >chr.list; +while read line ; +do + chr=`echo $line |tr -d "\n"`; + grep -w $chr ../TXdb.bed >$chr.bed; + split -3000 $chr.bed $chr.; + echo $chr....; + rm $chr.bed; +done <chr.list; |
b |
diff -r d4ca551ca300 -r adc0f7765d85 bin/vslz.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/vslz.pl Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,46 @@ +use strict; +exit; + +my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +my $SrcFolder=$config{SrcFolder}; + +my $BedFileName=$SrcFolder."/db/TXdb.1101.bed"; +my $RatioFileName=$ARGV[0]; +my $BedFileOutName=$ARGV[1]; +my %Ratios; + +open(RatioFile, $RatioFileName); + +while(my $RatioLine=<RatioFile>) +{ + chomp($RatioLine); + my @array=split("\t",$RatioLine); + $Ratios{$array[0]}=$array[2] if $array[14] eq "passed"; +} + +close(RatioFile); + +open(BedFile, $BedFileName); +open(BedFileOut,">".$BedFileOutName.".bed"); +print BedFileOut "track name=$BedFileOutName discription=$BedFileOutName useScore=1\n"; +while(my $BedLine=<BedFile>) +{ + my @array=split("\t",$BedLine); + $array[3]=~/^(\S*)\[([LS])\]/; + my $id=$1; + my $LS=$2; + #print $LS,"\n"; + next if not exists $Ratios{$id}; + $array[4]=$Ratios{$id}; + $array[4]=1-$Ratios{$id} if( $LS eq 'S'); + + $array[4]=sprintf("%.0f",$array[4]*1000); + for (my $i=0;$i<@array; $i++) + { + print BedFileOut $array[$i]; + print BedFileOut "\t" if $i<scalar(@array)-1; + } + #print BedFileOut "\n"; +} +close(BedFileOut); +close(BedFile); |
b |
diff -r d4ca551ca300 -r adc0f7765d85 cutoffs/cutoff.pair.06.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cutoffs/cutoff.pair.06.txt Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +9 2.7 0.60603248295953 0.171888000000001 +18 2 0.60524889781465 0.183333 +27 2 0.614125387586727 0.176226000000001 +36 1.9 0.607174152514097 0.174107 +45 1.8 0.600015884070526 0.175151 +54 1.5 0.606211216622966 0.181161 +63 1.5 0.630096537351149 0.17829 +72 1.4 0.600041179609881 0.181678 +81 1.4 0.604419014379505 0.180618 +90 1.4 0.605503082593424 0.179083 +99 1.3 0.615709740786329 0.177374 +108 1.3 0.60153811996046 0.178839 +117 1.3 0.624394647716218 0.172525 +126 1.3 0.607071122577313 0.179287 +135 1.2 0.604935986506864 0.180048 +144 1.2 0.606377388150568 0.179824 +153 1.1 0.609872307866415 0.172894 +162 1.1 0.605221663116349 0.176559 +171 1.1 0.60084962792178 0.177017 +180 1.1 0.606042806460085 0.179842 +189 1.1 0.639661884576477 0.165058000000001 +198 1 0.63233306074533 0.172839 +207 1 0.620992090280475 0.17554 +216 1 0.617967295839559 0.177413000000001 +225 1 0.643160025381914 0.165310000000001 +234 1 0.664093921751576 0.161004 +243 1 0.617444847253655 0.179608 +252 0.9 0.600141496641319 0.18301 +261 0.9 0.61458063483465 0.175533 +270 0.9 0.631646650541943 0.175805 +279 0.9 0.633124568165035 0.169468 +288 0.9 0.611511089439885 0.172566 +297 0.9 0.620072975883583 0.176992 +306 0.8 0.632771460485054 0.177489 +315 0.8 0.616480687227451 0.177778 +324 0.8 0.627081395066885 0.173799 +333 0.8 0.637882935696159 0.166883 +342 0.8 0.601440520784007 0.181724 +351 0.8 0.6308436890493 0.169283000000001 +360 0.8 0.639014375873075 0.164053 +369 0.8 0.608995842436592 0.182319 +378 0.7 0.61599095431047 0.179065 +387 0.7 0.632637490417973 0.173393000000001 +396 0.7 0.611914615604433 0.174309000000001 +405 0.7 0.636212590337212 0.168285 +414 0.7 0.601768900444485 0.17904 +423 0.7 0.652306181407458 0.165425 +432 0.7 0.61284769661545 0.17678 +441 0.7 0.620417630501693 0.179685000000001 +450 0.7 0.630731905615349 0.172766000000001 +459 0.7 0.610688367962893 0.174217 +468 0.7 0.614116817389574 0.169074 +477 0.7 0.627626017337352 0.170694000000001 +486 0.7 0.625196910435048 0.173867 +495 0.7 0.624173913851294 0.170272 +504 0.7 0.600827986205662 0.176072 +513 0.7 0.60691053900984 0.173314 +522 0.7 0.637574555072294 0.164769 +531 0.7 0.623224687213309 0.173131 +540 0.7 0.606854298464756 0.17057 +549 0.7 0.602771105882753 0.176564 +558 0.7 0.635304573393458 0.165681 +567 0.7 0.614272507442932 0.178017 +576 0.7 0.647638749954493 0.161989 +585 0.7 0.635927720935647 0.166479000000001 +594 0.7 0.654872976893549 0.165962 +603 0.6 0.623281215201779 0.169602 +612 0.6 0.620036715664558 0.174000000000001 +621 0.6 0.6207915344063 0.173405000000001 +630 0.6 0.605666404221127 0.172404000000001 +639 0.6 0.605282217899342 0.171117 +648 0.6 0.605873079749684 0.173455 +657 0.6 0.622135854785987 0.171086 +666 0.6 0.609650459959578 0.180212 +675 0.6 0.615727466486383 0.175395 +684 0.6 0.650390040594351 0.157856 +693 0.6 0.600004956285639 0.17354 +702 0.6 0.619480037234101 0.169091000000001 +711 0.6 0.620537197134915 0.170687 +720 0.6 0.63097256862101 0.168571 +729 0.6 0.616862968480703 0.176372 +738 0.6 0.619853798956006 0.169892 +747 0.6 0.668527865891001 0.157351 +756 0.6 0.609299339958315 0.173282 +765 0.6 0.606997082083795 0.176418 +774 0.6 0.60385659737608 0.178646 +783 0.6 0.618166146904953 0.170539000000001 +792 0.6 0.655214353317497 0.162477 +801 0.6 0.667723279232349 0.161535 +810 0.6 0.641501812796978 0.165034000000001 +819 0.6 0.627258070106693 0.167323 +828 0.6 0.611937213679756 0.173702 +837 0.6 0.637164577326402 0.165999 +846 0.6 0.609589936966381 0.16902 +855 0.6 0.605378844036658 0.177104 +864 0.6 0.603894805290932 0.174897 +873 0.6 0.61196823740213 0.177185000000001 +882 0.6 0.645164364920278 0.169894 +891 0.6 0.616946196689952 0.164449 +900 0.6 0.620119902403488 0.173101 +909 0.6 0.612352323569787 0.175357000000001 +918 0.6 0.604523310198474 0.181906 +927 0.6 0.639238406673874 0.161621 +936 0.6 0.626807582940753 0.177092000000001 +945 0.6 0.649699083584984 0.166011 +954 0.6 0.630080161768358 0.175451 +963 0.6 0.634570582339133 0.174267 +972 0.6 0.613974438568147 0.171149000000001 +981 0.6 0.646832147519767 0.16271 +990 0.6 0.611137442619996 0.170677 +999 0.6 0.609074843250805 0.177417 |
b |
diff -r d4ca551ca300 -r adc0f7765d85 cutoffs/cutoff.pair.07.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cutoffs/cutoff.pair.07.txt Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +9 3.8 0.723195172634673 0.136373 +18 3.1 0.704962064416613 0.145611 +27 2.7 0.707557510009516 0.148332 +36 2.7 0.727407566746925 0.139424000000001 +45 2.6 0.700849445263879 0.141565 +54 2.5 0.722912449885175 0.135534 +63 2.2 0.700978069072888 0.148155 +72 2.1 0.701953684982546 0.145638 +81 2.1 0.714519324839634 0.144058 +90 2 0.706725312179025 0.143797 +99 1.8 0.711963141532287 0.146397 +108 1.8 0.735577668185521 0.136275 +117 1.7 0.702810428805111 0.149303 +126 1.7 0.720543078508623 0.142071 +135 1.7 0.70149933958699 0.145076 +144 1.7 0.71795624649094 0.139588 +153 1.7 0.704835091689678 0.148916 +162 1.5 0.701545731183728 0.149502 +171 1.5 0.724190117580076 0.139706 +180 1.5 0.713150501847426 0.146353 +189 1.4 0.701931396757465 0.150412 +198 1.4 0.701219389501589 0.150832 +207 1.4 0.709880139643964 0.146954 +216 1.4 0.745895944323654 0.133521 +225 1.4 0.708094624059256 0.144051000000001 +234 1.4 0.714653417236785 0.141035 +243 1.4 0.702278770668715 0.146489 +252 1.4 0.714249389046789 0.140401 +261 1.4 0.720916794007688 0.140816 +270 1.4 0.704112631476039 0.140986 +279 1.4 0.725725706846333 0.140364 +288 1.4 0.713439724527154 0.142329 +297 1.3 0.703874456990055 0.145868 +306 1.3 0.714083967774953 0.138665 +315 1.3 0.717635749171277 0.139297 +324 1.3 0.70800987989496 0.14435 +333 1.3 0.703608941539679 0.143911 +342 1.3 0.712538846663198 0.142044 +351 1.3 0.736517626110649 0.137995 +360 1.3 0.752400150484099 0.133463 +369 1.3 0.700408117809905 0.149221 +378 1.2 0.710876753570226 0.145758 +387 1.2 0.719606098956021 0.141686 +396 1.1 0.701573537304199 0.149306 +405 1.1 0.711449711776699 0.146484 +414 1.1 0.7219174559929 0.140383 +423 1.1 0.711374621895577 0.147498 +432 1.1 0.706470648418195 0.146516 +441 1.1 0.702119897887331 0.144641 +450 1.1 0.710888241224795 0.14668 +459 1.1 0.700236191312255 0.145443 +468 1.1 0.709289110346942 0.145098000000001 +477 1.1 0.723682944143134 0.137715000000001 +486 1.1 0.711909258694556 0.148433000000001 +495 1.1 0.706050645629693 0.146412 +504 1.1 0.71026622444805 0.142836 +513 1.1 0.721840069030837 0.140123 +522 1.1 0.721616423306577 0.139145 +531 1.1 0.706599106603714 0.144744 +540 1.1 0.732906503371939 0.138953 +549 1.1 0.710747551039788 0.146028 +558 1.1 0.72403698505923 0.141907 +567 1.1 0.703858944181491 0.142942 +576 1.1 0.732227703846385 0.137018 +585 1.1 0.706081331511759 0.145895 +594 1.1 0.72851699215126 0.140481 +603 1.1 0.735776902337646 0.13646 +612 1 0.703269143581204 0.150933 +621 1 0.705868277579243 0.149988 +630 1 0.735304036557552 0.133891 +639 1 0.701919162079755 0.140775 +648 1 0.714774599695102 0.142638 +657 1 0.702484973766457 0.145888 +666 1 0.714275579611685 0.144289 +675 1 0.704751859660089 0.148070000000001 +684 1 0.725303040977116 0.139712 +693 1 0.7387858141582 0.134658 +702 1 0.737133543332787 0.135015 +711 1 0.71646444290315 0.140297 +720 1 0.735722464691124 0.136141 +729 1 0.709409780786399 0.144924000000001 +738 1 0.732557952972944 0.137912 +747 1 0.70280408850191 0.148749 +756 1 0.736014898114798 0.13601 +765 1 0.719075344267219 0.141568000000001 +774 1 0.705605602403474 0.142286 +783 1 0.714188898670159 0.143287 +792 1 0.708351298492815 0.14399 +801 1 0.701618595782959 0.140845 +810 1 0.70444245319677 0.142743000000001 +819 1 0.701265538602256 0.146322 +828 1 0.731290352078885 0.136012 +837 1 0.717812366249433 0.137717 +846 1 0.724767043040464 0.142316 +855 1 0.742408530369704 0.132985 +864 1 0.715927851943484 0.141312 +873 1 0.70674171896429 0.145039 +882 0.9 0.705311244948852 0.148678 +891 0.9 0.725745115884173 0.136097 +900 0.9 0.717545411117844 0.145251 +909 0.9 0.71139239340832 0.147716 +918 0.9 0.724110435852362 0.14723 +927 0.9 0.730304270252922 0.142741000000001 +936 0.9 0.702790994935148 0.14686 +945 0.9 0.714312901014806 0.142984 +954 0.9 0.718970705842182 0.14072 +963 0.9 0.702918434736422 0.146811 +972 0.9 0.763309367972586 0.135241 +981 0.9 0.70933162192829 0.14406 +990 0.9 0.714425781696018 0.142589 +999 0.9 0.707307109948989 0.146265 |
b |
diff -r d4ca551ca300 -r adc0f7765d85 cutoffs/cutoff.pair.08.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cutoffs/cutoff.pair.08.txt Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +9 5.7 0.807699596880158 0.11075 +18 4.8 0.812102488456895 0.109521 +27 4.8 0.807059148683326 0.109122 +36 4.5 0.810359129334262 0.106039 +45 4.1 0.800899562591127 0.109428 +54 4 0.819734419515034 0.104148 +63 3.6 0.801137818633087 0.112097 +72 3.5 0.805611266779768 0.107837 +81 3.2 0.804721543883597 0.110678 +90 3.2 0.81977543427128 0.105428 +99 3.2 0.80949445276013 0.109698 +108 3.1 0.800113619307504 0.115339 +117 2.9 0.80163357132874 0.111922 +126 2.9 0.807430653105459 0.110639 +135 2.8 0.809101610526483 0.109881 +144 2.8 0.808015366939668 0.109303 +153 2.8 0.815481435245632 0.106845 +162 2.7 0.813972734460064 0.108256 +171 2.7 0.803878583908181 0.111767 +180 2.7 0.814908823498375 0.110845 +189 2.5 0.803934003900257 0.112334 +198 2.5 0.819507987133127 0.106736 +207 2.5 0.816975135145848 0.109487 +216 2.5 0.801969702289987 0.11426 +225 2.5 0.814854242652841 0.106264 +234 2.5 0.809009743289239 0.110027000000001 +243 2.5 0.816859180637723 0.10951 +252 2.5 0.825923196681225 0.104742 +261 2.3 0.805863672217281 0.108068 +270 2.3 0.812114384187481 0.110547 +279 2.3 0.811109163497932 0.109461 +288 2.3 0.80680260883464 0.104169 +297 2.3 0.800054845624205 0.110627 +306 2 0.80196269021579 0.117711 +315 2 0.809945512786952 0.109737 +324 2 0.819364610024312 0.110094 +333 2 0.812712294607599 0.111396 +342 2 0.801568817408624 0.113051 +351 2 0.804296495631151 0.112833 +360 2 0.809648638295373 0.106319 +369 2 0.810216908330331 0.110967 +378 2 0.80081744573067 0.112898 +387 2 0.809614612996421 0.114576 +396 2 0.800940767696918 0.109701 +405 1.8 0.800397828290365 0.115299 +414 1.8 0.80375463158553 0.116013 +423 1.8 0.80204726507529 0.110069 +432 1.8 0.829673313493101 0.109584 +441 1.8 0.801512586472861 0.110591 +450 1.8 0.801317568038407 0.11341 +459 1.8 0.806266605408301 0.11489 +468 1.8 0.804368073911341 0.112083 +477 1.8 0.80034140400705 0.113126 +486 1.8 0.803037870942011 0.112527 +495 1.8 0.816147055378533 0.113451 +504 1.8 0.804009255355535 0.113167 +513 1.8 0.809536527236661 0.111121 +522 1.8 0.810250462485448 0.113045 +531 1.8 0.813388275601197 0.117925 +540 1.8 0.810529197259932 0.111664 +549 1.8 0.80286846045568 0.114872 +558 1.8 0.805231767462103 0.112439 +567 1.8 0.803715104541846 0.112076 +576 1.8 0.800856311237744 0.113416 +585 1.8 0.815485369320364 0.113187 +594 1.8 0.810404807750137 0.111576 +603 1.8 0.80944688874663 0.114048 +612 1.8 0.813671662890222 0.107955 +621 1.8 0.805581198188585 0.111274 +630 1.5 0.801229670148542 0.121577 +639 1.5 0.812201198793304 0.109719 +648 1.5 0.80667869232878 0.111841 +657 1.5 0.817613487850624 0.110405 +666 1.5 0.816866335035283 0.107114 +675 1.5 0.8101804833249 0.117018 +684 1.5 0.818509280416726 0.115303 +693 1.5 0.80107589244594 0.116714 +702 1.5 0.829601652947073 0.107173 +711 1.5 0.820953668506561 0.108043 +720 1.5 0.811426464962464 0.114567 +729 1.5 0.800211543269215 0.1163 +738 1.5 0.814971683053337 0.111887 +747 1.5 0.810647742818646 0.111346 +756 1.5 0.823589356371534 0.1075 +765 1.5 0.808183381146845 0.115074 +774 1.5 0.804024248189691 0.11739 +783 1.5 0.809214446523965 0.114048 +792 1.5 0.816922307151774 0.116292 +801 1.5 0.817083370511117 0.107909 +810 1.5 0.803218112416446 0.122658 +819 1.5 0.801792365687339 0.115227 +828 1.5 0.824077216658734 0.106899 +837 1.5 0.808010406353855 0.115016 +846 1.5 0.805503181301868 0.111932 +855 1.5 0.810848938809874 0.114185 +864 1.5 0.80077383728855 0.11448 +873 1.5 0.804510616030174 0.115034 +882 1.5 0.800413194808912 0.119586 +891 1.5 0.820672192506787 0.110138 +900 1.5 0.810326379209945 0.112103 +909 1.5 0.811096513155687 0.117781 +918 1.5 0.804732736125644 0.117636 +927 1.5 0.807950118492599 0.113333 +936 1.5 0.807915776288532 0.113945 +945 1.5 0.832853748697962 0.107864 +954 1.5 0.819637006281129 0.112646 +963 1.5 0.807519443743277 0.110865 +972 1.5 0.816853380970702 0.113831 +981 1.5 0.802684401572777 0.116302 +990 1.5 0.809446019377896 0.112676 +999 1.5 0.804655052306582 0.112474 |
b |
diff -r d4ca551ca300 -r adc0f7765d85 refGenes.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/refGenes.bed Thu Sep 07 15:06:58 2017 -0400 |
b |
b'@@ -0,0 +1,66259 @@\n+chr1\t67092175\t67134971\tNM_001276351\t0\t-\t67093004\t67127240\t0\t8\t1429,187,70,113,158,92,86,42,\t0,3059,4076,23176,33576,34990,38966,42754,\n+chr1\t67092175\t67134971\tNM_001276352\t0\t-\t67093579\t67127240\t0\t9\t1429,70,145,68,113,158,92,86,42,\t0,4076,11062,19401,23176,33576,34990,38966,42754,\n+chr1\t67092175\t67134971\tNR_075077\t0\t-\t67134971\t67134971\t0\t10\t1429,70,145,68,143,113,158,92,86,42,\t0,4076,11062,19401,21438,23176,33576,34990,38966,42754,\n+chr1\t201283451\t201332993\tNM_000299\t0\t+\t201283702\t201328836\t0\t15\t453,104,395,145,208,178,63,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36364,36815,38526,39561,40976,41489,42302,45310,46622,\n+chr1\t201283451\t201332993\tNM_001005337\t0\t+\t201283702\t201328836\t0\t14\t453,104,395,145,208,178,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36815,38526,39561,40976,41489,42302,45310,46622,\n+chr1\t8352403\t8423687\tNM_001042682\t0\t-\t8355086\t8364133\t0\t13\t2717,181,147,721,223,1379,114,162,200,93,163,81,127,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,71157,\n+chr1\t8352403\t8817640\tNM_001042681\t0\t-\t8355086\t8656297\t0\t23\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,464756,\n+chr1\t8352403\t8817640\tNM_012102\t0\t-\t8355086\t8656297\t0\t24\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,185,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,440000,464756,\n+chr1\t33513998\t34165274\tNM_001281956\t0\t-\t33519517\t34165097\t0\t71\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,189,174,249,174,195,186,189,147,189,114,81,146,178,189,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,364,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,43735,45301,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,650912,\n+chr1\t33513998\t34165842\tNM_052896\t0\t-\t33519517\t34165813\t0\t70\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,174,174,195,186,189,147,189,114,81,146,178,189,126,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,96,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,91864,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,651748,\n+chr1\t41847188\t42035925\tNR_038261\t0\t-\t42035925\t42035925\t0\t4\t1257,219,112,119,\t0,1682,71224,188618,\n+chr1\t75202130\t75611114\tNM_001130058\t0\t-\t75203726\t75541447\t0\t24\t1703,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,49,39,82,75,\t0,9337,11574,11788,12474,13623,15735,16359,17126,17669,20230,25595,31855,34856,36382,39870,40755,49079,72827,98481,137451,194452,339304,408909,\n+chr1\t75202132\t75611116\tNM_001320285\t0\t-\t75203726\t75300660\t0\t26\t1701,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,91,49,39,67,82,77,\t0,9335,11572,11786,12472,13621,15733,16357,17124,17667,20228,25593,31853,34854,36380,39868,40753,49077,72825,98479,109482,137449,194450,196222,339302,408907,\n+chr1\t75202132\t75611116\tNM_001320287\t0\t-\t75203726\t75300660\t0\t25\t1701,85,'..b'0123\t230123\t0\t8\t106,146,111,89,40,166,208,100,\t0,3710,5076,6459,7823,9904,12166,12494,\n+chr15_KI270727v1_random\t241092\t241174\tNR_049886\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n+chr15_KI270727v1_random\t241092\t241174\tNR_049895\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n+chr15_KI270727v1_random\t241092\t241174\tNR_128721\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n+chr15_KI270727v1_random\t372321\t373405\tNM_001004719\t0\t+\t372419\t373361\t0\t1\t1084,\t0,\n+chr16_KI270728v1_random\t17231\t19833\tNM_001099687\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNM_016212\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110886\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110897\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110910\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110911\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n+chr16_KI270728v1_random\t17233\t19273\tNM_001205259\t0\t+\t17263\t18809\t0\t2\t392,477,\t0,1563,\n+chr16_KI270728v1_random\t17233\t19273\tNR_110914\t0\t+\t19273\t19273\t0\t3\t392,289,477,\t0,1062,1563,\n+chr16_KI270728v1_random\t17250\t19837\tNM_001330061\t0\t+\t17263\t18809\t0\t2\t375,1041,\t0,1546,\n+chr16_KI270728v1_random\t17251\t19833\tNM_001330066\t0\t+\t17263\t18809\t0\t2\t374,1037,\t0,1545,\n+chr16_KI270728v1_random\t933855\t936466\tNR_110898\t0\t-\t936466\t936466\t0\t3\t1039,290,268,\t0,1251,2343,\n+chr16_KI270728v1_random\t1001629\t1037618\tNR_135178\t0\t+\t1037618\t1037618\t0\t25\t105,103,322,130,110,172,175,173,104,151,211,133,128,196,134,396,152,192,188,193,183,182,148,197,233,\t0,1009,5706,6177,6753,7361,8561,11932,12489,13586,16444,16928,17168,17388,20254,20921,21434,22284,28770,30290,31190,34103,35109,35455,35756,\n+chr16_KI270728v1_random\t1331814\t1346848\tNR_130771\t0\t-\t1346848\t1346848\t0\t3\t5057,93,295,\t0,6640,14739,\n+chr16_KI270728v1_random\t1331814\t1346848\tNR_130772\t0\t-\t1346848\t1346848\t0\t2\t5057,295,\t0,14739,\n+chr17_GL000205v2_random\t54856\t57966\tNR_003682\t0\t-\t57966\t57966\t0\t1\t3110,\t0,\n+chr22_KI270731v1_random\t69141\t86923\tNR_003267\t0\t+\t86923\t86923\t0\t13\t189,171,135,81,193,158,150,137,188,128,113,114,280,\t0,732,4094,4362,9648,10233,11930,12603,16240,16660,16890,17084,17502,\n+chr22_KI270733v1_random\t122272\t135645\tNR_046235\t0\t+\t135645\t135645\t0\t1\t13373,\t0,\n+chr22_KI270733v1_random\t130203\t135280\tNR_003287\t0\t+\t135280\t135280\t0\t1\t5077,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_106782\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_128715\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_128716\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_128717\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t125128\t125189\tNR_037458\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t125128\t125189\tNR_128714\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t125930\t127799\tNR_003286\t0\t+\t127799\t127799\t0\t1\t1869,\t0,\n+chr22_KI270733v1_random\t128876\t129032\tNR_003285\t0\t+\t129032\t129032\t0\t1\t156,\t0,\n+chr22_KI270733v1_random\t170214\t170275\tNR_037458\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t170214\t170275\tNR_128714\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t171011\t172880\tNR_003286\t0\t+\t172880\t172880\t0\t1\t1869,\t0,\n+chr22_KI270733v1_random\t173955\t174111\tNR_003285\t0\t+\t174111\t174111\t0\t1\t156,\t0,\n+chr22_KI270734v1_random\t72453\t74335\tNR_136575\t0\t+\t74335\t74335\t0\t1\t1882,\t0,\n+chr22_KI270734v1_random\t90958\t98408\tNR_136574\t0\t-\t98408\t98408\t0\t4\t429,155,73,884,\t0,5076,5960,6566,\n+chr22_KI270734v1_random\t131493\t137393\tNM_005675\t0\t+\t131645\t136994\t0\t5\t262,161,101,141,549,\t0,342,3949,4665,5351,\n+chr22_KI270734v1_random\t138078\t161750\tNM_001195226\t0\t-\t138479\t156446\t0\t14\t589,89,99,176,147,93,82,80,117,65,150,35,209,62,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23610,\n+chr22_KI270734v1_random\t138078\t161852\tNM_016335\t0\t-\t138479\t161586\t0\t15\t589,89,99,176,147,93,82,80,117,65,150,35,209,313,164,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23235,23610,\n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 splice_trap.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splice_trap.xml Thu Sep 07 15:06:58 2017 -0400 |
[ |
@@ -0,0 +1,35 @@ +<tool id="splice_trap" name="SpliceTrap" version="1.0.0"> + <description>A statistic tool for quantifying exon inclusion ratios in paired-end RNA-seq data, with broad applications for the study of alternative splicing. + </description> + <requirements> + <requirement type="package" version="1.2.1.1">bowtie</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + perl $__tool_directory__/SpliceTrap.pl -p 8 -l $__tool_directory__ -d hg38v3 -1 "$input1" -2 "$input2" -s "$read_size" "$output1" "$output2" + ]]></command> + <inputs> + <param type="data" name="input1" format="fastq" /> + <param type="data" name="input2" format="fastq" /> + <param name='read_size' type='integer' value='50' label="Read size" /> + </inputs> + <outputs> + <data name="output1" format="txt" /> + <data name="output2" format="txt" /> + </outputs> + <tests> + <test> + <param name="input1" value="input1.fastq"/> + <param name="input2" value="input2.fastq"/> + <output name="output1" file="output1.txt"/> + <output name="output2" file="output2.txt"/> + </test> + </tests> + <help> + **SpliceTrap** + </help> + <citations> + <citation type="bibtex"> + http://rulai.cshl.edu/splicetrap + </citation> + </citations> +</tool> |
b |
diff -r d4ca551ca300 -r adc0f7765d85 src/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Makefile Thu Sep 07 15:06:58 2017 -0400 |
b |
@@ -0,0 +1,3 @@ +all: + g++ -O2 splicetrap.estimate.cpp -o Pair_estimate_c + mv Pair_estimate_c ../bin |
b |
diff -r d4ca551ca300 -r adc0f7765d85 src/splicetrap.estimate.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/splicetrap.estimate.cpp Thu Sep 07 15:06:58 2017 -0400 |
[ |
b'@@ -0,0 +1,854 @@\n+//Author: Jie Wu@CSHL\n+//TO replace the original Perl code\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <iostream>\n+#include <fstream>\n+#include <string.h>\n+#include <unistd.h>\n+#include <sstream>\n+#include <map>\n+#include <vector>\n+#include <math.h>\n+#include <time.h>\n+#include <limits>\n+#include <iomanip>\n+\n+using namespace std;\n+\n+int MAX_LINE_LEN = 1024*1024;\n+\n+void printusage()\n+{\n+ cout<< "\\tUsage:"<<endl;\n+ cout<<"\\t-s\\tread_size"<<endl;\n+ cout<<"\\t-b\\tIRM file"<<endl;\n+ cout<<"\\t-f\\tFZM file"<<endl;\n+ cout<<"\\t-1\\tmapping result 1"<<endl;\n+ cout<<"\\t-2\\tmapping result 2"<<endl;\n+ cout<<"\\t-d\\tTXdb database file"<<endl;\n+ cout<<"\\t-o\\toutput prefix"<<endl;\n+ \n+}\n+\n+struct ReadPairInfo\n+{\n+\tbool isPaired;\n+\tvector<int> IsoFlag; //flag as 1 if mapped to the isoform\n+\tvector<int> Size; //fragment size in the isoform\n+\tvector<int> Pos;\n+};\n+\n+class TXdb_entry{\n+\tpublic:\n+\t string chrid;\n+\t //int start;\n+\t //int end;\n+\t string id;\n+\t bool iscomplete;\n+\t int start;\n+\t int end;\n+\t int exonstarts[3];\n+\t char strand;\n+\t int exonsize[3];\n+\t int len1;\n+\t int len2;\n+\t vector< ReadPairInfo > pairs;\n+\t int snum1; //single-end reads\n+\t int snum2;\n+\t int snum12;\n+\t float ir, bir;\n+\t int enum1; //exon body reads\n+\t int enum2;\n+\t int enum3;\n+\t \n+\t int jnum12;\n+\t int jnum23;\n+\t int jnum13;\n+\n+\t int totalreadnum;\n+\t void estimate(vector<double> &FZM, map<string, vector<double> > &IRM, int &read_size)\n+\t {\n+\t\t float Maxe=0;\n+\t\t float Max = -numeric_limits<float>::max();\n+\t\t float BMaxe=0;\n+\t\t float BMax=-numeric_limits<float>::max();\n+\t\t for(float e1=0.001; e1<1;e1=e1+0.001)\n+\t\t {\n+\t\t\t float efix1 = e1*len1/\n+\t\t\t (e1*exonsize[1]+len2);\n+\t\t\t float efix2 = 1-efix1;\n+\t\t\t float LL = snum12*log( efix1/(len1-read_size+1)\n+\t\t\t + efix2/(len2-read_size+1) )\n+\t\t\t + snum1*log(efix1/(len1-read_size+1)) \n+\t\t\t\t + snum2*log(efix2/(len2-read_size+1));\n+\t\t\t\t //cout<<LL<<endl;\n+\t\t\t\t //cout<<efix1<<"\\t" <<pairs.size()<<"\\t" <<len2<<"\\t"\t<<endl;\n+\t\t //int num1=0, num2=0,num12=0;\n+\t\t for(int i=0;i<pairs.size();i++)\n+\t\t\t {\n+ if(pairs[i].IsoFlag[0] == 1)\n+\t\t\t {\n+\t\t\t\t if(pairs[i].IsoFlag[1] == 0)\n+\t\t\t\t {\n+\t\t\t\t\t // num1++;\n+\t\t\t\t\t float tmp=FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1);\n+\t\t\t\t\t if (tmp == 0)\n+\t\t\t\t\t {\n+\t\t\t\t\t \t LL = LL - 308;\n+\t\t\t\t\t }\n+\t\t\t\t\t else\n+\t\t\t\t\t {\n+\t\t\t\t\t\t LL = LL + log(tmp);\n+\t\t\t\t\t }\n+\t\t\t\t }\n+\t\t\t\t else\n+\t\t\t\t {\n+\t\t\t\t\t // num12++;\n+\t\t\t\t\t float tmp=(FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1)) \n+\t\t\t\t\t + (FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1));\n+\t\t\t\t \t if(tmp ==0)\n+\t\t\t\t\t {\n+\t\t\t\t\t LL = LL -308;\n+\t\t\t\t\t }\n+\t\t\t\t\t else\n+\t\t\t\t\t {\n+\t\t\t\t\t\t LL = LL + log(tmp);\n+\t\t\t\t\t }\n+\t\t\t\t }\n+\t\t\t }\n+\t\t\t else\n+\t\t\t {\n+\t\t\t\t //num2++;\n+\t\t\t\t float tmp = FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1);\n+\t\t\t\t if(tmp ==0)\n+\t\t\t\t {\n+\t\t\t\t LL= LL - 308; \n+\t\t\t\t }\n+\t\t\t\t else\n+\t\t\t\t {\n+\t\t\t\t\t LL = LL +log(tmp);\n+\t\t\t\t }\n+\t\t\t }\n+\t\t\t }\n+\t\t\t //cout<<num1<<"\\t"<<num2<<"\\t"<<num12<<"\\n";\n+\t\t\t //cout<<LL<<endl;\n+\t\t\t \n+\t\t\t if(IRM["CA"].size() > 0)\n+\t\t\t {\n+\t\t\t\t float BLL;\n+\t\t\t\t string eventtype =id.substr(0,2);\n+\t\t\t\t if(!eventtype.compare("ME"))\n+\t\t\t\t {\n+\t\t\t\t\t eventtype = "CA";\n+\t\t\t\t }\n+\t\t\t\t BLL = LL + log(IRM[eventtype][int(e1/0.001)]);\n+\t\t\t\t if(BLL > BMax)\n+\t\t\t\t { \n+\t\t\t\t\t BMax = BLL;\n+\t\t\t\t\t BMaxe = e1;\n+\t\t\t\t }\n+\t\t\t }\n+\t\t\t\t //cout<<LL<<endl;\n+\t\t\t if(LL > Max)\n+\t\t\t {\n+\t\t\t\t Max= LL;\n+\t\t\t\t Maxe = e1;\n+\n+\t\t\t }\n+\t\t\t\t \n+\t\t }\n+\t\t ir= Maxe;\n+\t\t bir= BMaxe;\n+\t }\n+\t\t\n+\t TXdb_entry(char *line)\n+\t {\n+\t\t size_t found;\n+\t\t string linestr(line);\n+\t\t found = linestr.find("[L]");\n+\t\t if(found != string::npos)\n+\t\t {\n+\t\t\t iscomplete=1;\n+\t\t\n+\t\t \tchar *token;\n+\n+\t\t \ttoken = strtok(line,"\\t");\n+\t\t \tif(token==NULL)\n+\t\t \t{\n+\t\t\t\t cout<<"error1"<<endl;\n+\t\t\t\t exit(0);\n+\t\t\t }\n+\t\t\t else\n+\t\t\t {\n+\t\t\t\t c'..b'gle_read_num<<endl;\n+ cout<<"Paired reads used:\\t"<<total_pair_read_num<<endl;\n+\n+ log_file<<"#total_read_num:\\t"<<total_read_num*2<<endl;\n+ log_file<<"#total_single_read_num:\\t"<<total_single_read_num<<endl;\n+ log_file<<"#total_pair_read_num:\\t"<<total_pair_read_num<<endl;\n+ \n+ //Calculate ratios and output\n+ cout<<"Now doing estimations and output to "<<ratio_file_name<<"\\t"<<num_file_name<<endl;\n+ ofstream ratio_file(ratio_file_name.c_str());\n+ ofstream num_file(num_file_name.c_str());\n+ map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.begin();\n+ for(;txdb_it!=TXdb_entries.end();txdb_it++)\n+ {\n+\t string eventid = txdb_it->first;\n+\t //cout<< eventid <<endl; \n+\t txdb_it->second.estimate(FZM,IRM,read_size);\n+/*\t ratio_file<< eventid<<"\\t"\n+\t \t\t<<setprecision(4)<<fixed\n+\t <<txdb_it->second.ir<<"\\t"\n+\t\t\t<<txdb_it->second.bir<<"\\t"\n+\t\t\t<<"NA\\tNA\\tNA\\t"\n+\t\t\t<<txdb_it->second.jnum12<<"\\t"\n+\t\t\t<<txdb_it->second.jnum23<<"\\t"\n+\t\t\t<<txdb_it->second.jnum13<<"\\t"\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"\n+\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"\n+\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"\n+\t\t\t<<txdb_it->second.exonsize[2]<<"\\t"\n+\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<endl;\n+\t\t*/\n+\t ratio_file<<eventid<<"\\t"\n+\t \t\t<<setprecision(4)<<fixed\n+\t\t\t<<txdb_it->second.ir<<"\\t" //1\n+\t\t\t<<txdb_it->second.bir<<"\\t" //2\n+\t\t\t<<txdb_it->second.chrid<<"\\t" //3\n+\t\t\t<<txdb_it->second.start<<"," //\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t" //4\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"//5\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"//6\n+\t\t\t<<txdb_it->second.strand<<"\\t"//7\n+\t\t\t<<txdb_it->second.jnum12<<"\\t"//8\n+\t\t\t<<txdb_it->second.jnum23<<"\\t"//9\n+\t\t\t<<txdb_it->second.jnum13<<"\\t"//10\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"//11\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"//12\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"//13`\n+\t\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<"\\t"//14\n+\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"//15\n+\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"//16\n+\t\t\t<<txdb_it->second.exonsize[2]<<endl;//17\n+\n+\t num_file<< eventid<<"\\t"\n+\t\t\t<<txdb_it->second.chrid<<"\\t"\n+\t\t\t<<txdb_it->second.start<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t"\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"\n+\t\t\t<<txdb_it->second.strand<<"\\t"\n+ <<txdb_it->second.enum1<<"\\t"\n+\t\t\t<<txdb_it->second.enum2<<"\\t"\n+\t\t\t<<txdb_it->second.enum3<<"\\t"\n+\t\t\t<<txdb_it->second.jnum12<<"\\t"\n+\t\t\t<<txdb_it->second.jnum23<<"\\t"\n+\t\t\t<<txdb_it->second.jnum13<<endl;\n+ }\n+ log_file.close();\n+ ratio_file.close();\n+ num_file.close();\n+ clock_finish = clock();\n+ cout<<"Done! time used:"<<(double)(clock_finish-clock_start)/CLOCKS_PER_SEC<<" seconds"<<endl;\n+\n+ \n+}\n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 test-data/input1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input1.fastq Thu Sep 07 15:06:58 2017 -0400 |
b |
b"@@ -0,0 +1,200484 @@\n+@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/1\n+ATCTTTTGTGGCTACAGTAAGTTCAATCTGAAGTCAAAACCAACCAATTT\n++\n+5.544,444344555CC?CAEF@EEFFFFFFFFFFFFFFFFFEFFFEFFF\n+@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/1\n+TCCATACATAGGCCTCGGGGTGGGGGAGTCAGAAGCCCCCAGACCCTGTG\n++\n+GFFFGFFBFCHHHHHHHHHHIHEEE@@@=GHGHHHHHHHHHHHHHHHHHH\n+@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/1\n+GTATAACGCTAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n++\n+55*'+&&5'55('''888:8FFFFFFFFFF4/1;/4./++FFFFF=5:E#\n+@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/1\n+AACGGATCCATTGTTTCGAGAACGTGATCGCCCTCATCTACCTAGCCTCA\n++\n+D<@DDA@A:AHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHBHHHHHHH\n+@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/1\n+GCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCACCATCGTGC\n++\n+F@FFFGGFGFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHH\n+@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/1\n+TGGACAGTTGCTCCTGGCTCCAGAACCTGTCTTGCAAGGGACAGTGGGGT\n++\n+A:AA@HHHHHHHHHHHHHHHHHHHIHHHHHHHHHHGF=GFHHHH@@?AA*\n+@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/1\n+ATTAGGAAACATGGAATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTT\n++\n+@<><CHHHHHHHHHHHHHHHHHGGHHHHHHHHHHGGGHHHHHHHHGGGGG\n+@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/1\n+CAATAGCCAGATGGTTGGTGGGGCAGCCAGGCAGGGAGGACCCAGGGCTG\n++\n+555544555544555;AAAAFFBBEEEE;=FCB9F===<<FFFFEFFEEE\n+@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/1\n+GTGCCAAATTGTCACATTCGAGCTTGAGGCTGTGGTACTGAGCTTGCAGT\n++\n+D>BFD@@?>>54454?FFGFGGGGGGGGGGGGGEGGGGGGGGGEGGGGGG\n+@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/1\n+CCCGGCCTAACTTTCATTTAATTTCAATGAATTTTCTTTTTTTTTTTTTT\n++\n+56455==@=>HHHHHHHHHGHHHHHHHHGH=HHHHHHEEEECEEEEEEEE\n+@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/1\n+GGAGAAGGGGCGAGGGAAGAAGACCTTTGCTATCCCAGATACCAGGACTG\n++\n+55544145444/444GFDFG9A@@@DD>.F@><<=FDD@AGG>GGEGGEG\n+@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/1\n+CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n++\n+FBDFFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHH\n+@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/1\n+ATGAGAAGGAGCCATCAGGACCTTATGAAAGCGACGAAGACAAGAGTGAT\n++\n+55554DDFFFBBFFFHHGHHHHHHHHHHHHHHHHHHDHH8HHHHHHHHFH\n+@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/1\n+CGAAAACCAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCC\n++\n+555,4&4551FFFFFBF3BDFFFFFFEFFFFBEFFFFFFDFFFFFFFFF=\n+@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/1\n+GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n++\n+HGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/1\n+CAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCCTGCGACC\n++\n+54-55A@A@@HHHHHFFGGE555558<=;=55555AAAA?HHHHH>8@@>\n+@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/1\n+CTTGCCTCACATGTCAGGGCAGGTATCCACCTAACCAGGCTGCAGGGGAG\n++\n+555555544444544HHHHGHHHHHHHHHHHHHHHHHHHHHH5@HFFF*F\n+@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/1\n+CCAGCCTTGATACAGCATTTTCCACTTCTCTCTGTAGAGATCAGACGATT\n++\n+55555555(5@>@=:@=8.@04554CCCCC.441445444-555445555\n+@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/1\n+CAGCATCCTGCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTGG\n++\n+55555A@AAAGGEGGGGGGGGGGGGGGGGGGGCGGGFEGFGGGGFGGCGG\n+@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/1\n+CACCATCGTGCCCGTTCTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCC\n++\n+5544455,0545445FFFEEFFFFFFFFFEEBC;D6<5-?FFFFFFFFFF\n+@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/1\n+CTCTCCTCTAACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGGCAGGT\n++\n+55,34)4-53HHEHHGGGGG7DC?@GG;BGGEGGGGGGGGGGGGGGGGGA\n+@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/1\n+CAAAAATGTAGCTGCCCTGACCTGGTCTCCCCTGACCCTTCCACGGGGCT\n++\n+56624545442525554455FFECECGEDGFF8DF###############\n+@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/1\n+GACAGATGATGTCCAAGCCCCTACATGCCCCAGACCCCAGGGCACGGCTG\n++\n+##################################################\n+@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/1\n+ATCTCGTAGTACATCACATAGTGACGCTGCATCTCTGACTTCTCACTGGC\n++\n+5653445555HHHHHHHHHH9;@=@HHHHHHDHHHHHHHHHHHHHHHHDH\n+@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/1\n+CACCATTTGACCCTGAGCCAG"..b':6601:197274#0/1\n+CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHH\n+@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/1\n+TGTGTCTTGTGCTAAACATTCCTTTCTCTCCGTGCCTCTGTCTCCCCTCT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/1\n+CAGCCCTCTCACCCTGGTACTGCATGCACGCAATGCTAGCTGCCCCTTTC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHEHHHGHIIHHHHHAHHHHHHHHHGH\n+@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/1\n+CTGGGACCCAGGCAGCTGCCACCTTGTCACCATGAGAGAATTTGGGGAGT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHG\n+@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/1\n+GGACTGTCCACCAGGTCCCGACGGGCAGGAATGCAGATGGGTACCTTTCC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHFHHHHHHHE\n+@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/1\n+GCCAGTGGTGGGCATGCGGCTGCGGAGCACGTCCTGAGCTGTGGGGACGT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHBDDBB@9@AAHHGHHHHHGHHDHHH\n+@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/1\n+CTACTCCTTCCGCAGCAGGGAGGTGTGCAGAGCCGTGCTCAGCTTCCTCT\n++\n+HHHHHHHHHHHHHHHHHHHHHHH8HAGFGGFHHHFGGHHHHHGHHHIHGH\n+@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/1\n+GTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGAGCCAT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/1\n+GCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTC\n++\n+55555?>?>>5555444555444442=5<=55444C=6C>2555551544\n+@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/1\n+CTGGGACCTGCGGGAGGGCCGCCAGCTGCAGCAGCATGACTTCAGCTCCC\n++\n+HHHHHHHHHHHHHHHHHGEHHHHHHHCHHHFHHHHHEFGDFHHHEHBFHH\n+@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/1\n+AAACAAAACATTTTCCTTTGGGTTTTTTTTTTTCTTTCTTTTTTCTCCGC\n++\n+HHHHHHGGGHHHHHHHHHHHHHHHHHHHGGGGGBGGGBHHGGGGGGGHHH\n+@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/1\n+GTTCCAACCACCGCCGGGGAGGGAGAGGGCCCCTGTCCCTGCAGGGCCGC\n++\n+ADAD?DEFBEHHHHHCCDGDHCEEHCGBGAHHHHHCDCGD5555424554\n+@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/1\n+CCTGGAACTGCCTGACCATAGTCTGATTCTGCAGGTCCCAGACCACAATG\n++\n+?ACDC?DDGG=DDD>55554GGFFADDDA==<==>D=DAD5445544445\n+@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/1\n+CACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTGTGCATCTCG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/1\n+CTGCCTGGCACGCACCCGGTGGCTGCACCATCCACACGCAAGACTGCAAC\n++\n+HHHHHHHHHHHHHHHHHDHHHHHGHHFHHHHHHHHHHHFHHHFHGHFHHH\n+@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/1\n+CGGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAA\n++\n+HHHHHHHGEHHHHHHHGGGGEHGHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/1\n+CGGGCTCCTCGCACCTACCCCAGCAACTCAAATTCACCACCTCGGACTCC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/1\n+ATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTTTTTTTTTTTTTTTTT\n++\n+HHHHHHHGGHHHHGHHHGHC83=;><=@=<CCCCCCCCCCCCCCCCCCCC\n+@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/1\n+CCTGGGTCGCCCACCCTCACCCTGCTCCTCCCAGCTCAGCTAAGCTCGTC\n++\n+HHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/1\n+GTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIH\n+@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/1\n+CACCAACCAGCCGCGGGCCGCGCAGCTGGTGGACAAGGACAGCACCTTCC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHGHH\n+@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/1\n+CAGAGCTTAGCGGGGGGCTGAGCTGGTGTCTTTGAACCTCTAGTCCCAGG\n++\n+HHHHHHHHHHHHHHHCGGGHEHHFHHEHHHHHHHHHHHEHHHHHFHHHHH\n+@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/1\n+TGTGTGCCCCATTTCTCCATATAGTCTTCCTCAGGCAGGTCCTAGGTCCC\n++\n+??DDDEDECC<=@><CCC@?<<<=@EGGGGG?GGGGCGCE>@@6=55554\n+@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/1\n+CCCAGGTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGA\n++\n+HHHHHHHHHGHHHHHHHHGHHHHHHHHHHHHFHHHHHHHHHHGHHHHAHE\n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 test-data/input2.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input2.fastq Thu Sep 07 15:06:58 2017 -0400 |
b |
b"@@ -0,0 +1,200484 @@\n+@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/2\n+CGGATTTCAGCTACTGCAAGCTCAGTACCACAGCCTCAAGCTCGAATGTG\n++\n+HH;HHHHHGHHHHHHHHHHGHDHEHHHHHEHHHHBHHFHHHHHHHHHD0F\n+@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/2\n+GAGTGCGAGGGAAGTCAGGGGAGGATCGCGAGGGAAGCCAGGGGAGGATC\n++\n+HHHHHBF8G>&4555GGGGGHHGGEHHHHHHHHH=HHHHHHHHHHHGB9H\n+@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/2\n+AACCGGGGGACGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTT\n++\n+@FEEH>==9=05544FGFGFHHHBHHHFHF>AAAAHHHHHHHEHHHHHHH\n+@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/2\n+TCAGCATGCTTCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTG\n++\n+5515555/5515444FFHHHHHHHHHHHHHHHHHHHHHHHEHHHHGHH@H\n+@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/2\n+GGCCAAGCAGGTCACCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGC\n++\n+HHHHFGHHHGFAFFFHHFHHHHH/HHHHGHHEHHEHGFHHDGF=AA=@@8\n+@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/2\n+TGGAGTCCTTCATGCCCAGGTCTGGAACCCAGGTTCTGACCCCAGGGCCC\n++\n+FDFFFEGGGGHHHHGHHHHH>AAA8GGGGGHHHGHHHHHHHHHHHGFHHH\n+@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/2\n+GGCCAGCCGGGCTCCAGAGGGGTCAGGGCGCGACGAGAACCAACTCTTTA\n++\n+FDFFBDFDDBAAADDGHGHHHHBHHHHHGHGHHHHHHHHHHHHHHHHHFH\n+@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/2\n+GGGCTCTCCCTCTGTATCGCCTGGGGAGGCTGCTGAGGTGACTTTTTGGA\n++\n+A?DDABFBFFHGHEHHHHHHHHHIHHDHCC55555BFFCD;:9=;=@=><\n+@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/2\n+CACAGTAGGCGTTCTATAAATGTGTCACAAGAATGGCTTCCCTCAGGAAG\n++\n+55444;@=@>HHHDHHHHHFFGHHHHHHHHHIHHHFH=HHBB?<D#####\n+@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/2\n+GCCTGGGCAACATAGCGAAACCACATCTCTACAAAAAAATCCTCCAAAAT\n++\n+HGIEHHHHGHF=@FF8A>>@HFHH=HHHHHHHIHHHGGGGH@@HHGGGEG\n+@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/2\n+AGCCACTGCCTTTCTGCTCAGATGCTGGCACCTCCGCCCCCGGGGCTGCC\n++\n+EHHHFF?GFDGFFB???DDAD<FC<55555FFGGG<?>>61/5444-555\n+@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/2\n+CGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAGCTGTCCC\n++\n+HHHHHHHHHHHHHHHGHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHDHHH\n+@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/2\n+GGAGTCACAGGATTTGGAGGCAGGAGTGCTGGCGGGAAGGGCATTCAGGA\n++\n+HHHHHHHFEH?=DDDHIFHHEHEDE?DAADH@FHHC'@CE##########\n+@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/2\n+CTCGGAAGGCAAGGCACATCTTGTGGTAGAAAATTTCGTGCAAATTAGGA\n++\n+HHHHHGGH=IADDADHHGHH444-4A?A?AGHGHHFGFG@/5544HDHEE\n+@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/2\n+CTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTCCCATCTGACCCTCCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/2\n+GTTTAAAGGTGATACTTATTCTCGGAAGGCAAGGCACATCTTGTGGGAGA\n++\n+EF;GG4445544544FF@FFEHFHFFHGHH####################\n+@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/2\n+GGGACAGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGA\n++\n+FF8FFBFFFFFDF@FCD>CFF@@F:HEHEHHHHBHHHHHF==<>5?DDA;\n+@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/2\n+GTGAGGGTGGGCGACCCAGGATTCCCCCTCCCCTTCCCAAATAAAGATGA\n++\n+BEFDB44(4411445DA?ADHHHHIFDDC>:::5@DDDC?HHHDEBFFB>\n+@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/2\n+CAGAGCGTAAGAAATGGATCCATTGTTCCGAGAACGTGATCGCCCTCATC\n++\n+HH@HHHFDHHHHHHHFHHHGHGHHHHHHHGGHHHHHFHHAHHHHHGHHGH\n+@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/2\n+CCTCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n++\n+GG/GGHHHHHHHHHHHHHHHHHHHDHHHHHFDHHHHHH@HHEHHHHHHHH\n+@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/2\n+GGCAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAAGG\n++\n+FFGHH55,5514441>><<BHHEHFF?9F4FFFBFHHHHHHHHGHHFF4H\n+@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/2\n+CTTCGCAAATTTGTCCCAGGGATGGATCGCCTGTGCTGCCTTCGCCCGCC\n++\n+D@5AA4453451444GGGFDHH@GEA;DDD=:=+:D@DFDEDHHB#####\n+@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/2\n+CCTGTCCAGAGTCTGAGGGGGGAGGCCAGGCCCTGCCTTGGGGTCTGAGG\n++\n+##################################################\n+@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/2\n+GGGGGGCGCCGCAGCTGCGCGGCCGCTCCCTCCTAGCCGGCCCTTGAGGG\n++\n+HHHHHHHEGHIHHHHHHDHF@@<A?FFE@FGGGAG4====HHHHHHHEHB\n+@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/2\n+CTGAATCCCTTGCCCAGAGGA"..b':6601:197274#0/2\n+CCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGH\n+@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/2\n+GGCCCTGCCCTTGACCCCACTACCCGTGGGGCTGCAGCCGCCTTCGCTGC\n++\n+HHHHHHHHHHHHHHHHHHIHHGHHH>A??@FHHHFHHFHDHH=HHB>4FF\n+@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/2\n+CTTTATTTGGGAAGGGGAGGGGGAATCCTGGGTCGCCCACCCTCACCCTG\n++\n+HHHHHHHGGHHHHHHHHFHGGGGGHHHHHHHGHHHHFHHEH9BHEDD###\n+@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/2\n+CGGCCGGCTGCATCCCACACCAGCCTGAGCCCCAGACGGTCAGTCAGTGC\n++\n+HHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHH\n+@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/2\n+CGAGGGGTCCAGAGTGGAGAGAGCCCCGAGCAGGAGTGCATCTCCCTCGC\n++\n+HHHHHHHFHHHHHHHFHHHHHHHHHHHHHHHGHHHHHHHHHHIHFHHHGH\n+@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/2\n+GGCTGCAGATTCCATTCAGCAGGCCCGAGAGCAAGCACCACGCTAGCCTG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHG\n+@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/2\n+CAAGACTGCAACTTCAGATGCTCCGCACGCTGGAGATGCTGGACAGGGGC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFHHHHHHHEHEHE\n+@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/2\n+CTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGAGGAGG\n++\n+HHGHHHHHHHHHHHHHHHHHHGHHGHHHHHHHHHHHHHHHHHHHEHHHHG\n+@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/2\n+GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n++\n+HF@GHD?>DA=<>;=444444245444445>>@>;BECBF@?A<>@AAA8\n+@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/2\n+GGACTGAGGACGACTCCTTGGACTGGAAAATGCTGGCCCCGTACGGCGTC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHIH\n+@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/2\n+GGAACCTTCTCCGGATTGGGTTCATGAGCATTTTTGTGGGTGTGTATGTG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHFH\n+@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/2\n+GGCCGTCTTTGACCTGCTCCTGGCTGTTGGCATTGCTGCCTACCCTGGCA\n++\n+55555<@@@@===<655244A??DAC:C?#####################\n+@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/2\n+GGTGAGGCCAGCACCTTGTCCATTTGGGACCTGGCGGCGCCCACCCCCCG\n++\n+5-5449=;==BFFBFDBFDDC>?>>D?DDDHHHHHBFFC@44244<<<<<\n+@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/2\n+ATGATGTTTCCACAAAGCAGGCATTCGGGCTCCTCGCACCTACCCCAGCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/2\n+TGCTGCGGGTGTCTCCGGCTGGGCATGCGGGGGCCCGGGGACTGCCTGGC\n++\n+HHHHHHHHHHHHGHHHHFDEBDDBB5552*DDBBFHHHHH@FDF######\n+@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/2\n+CTGGTCTCCCATCTGACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFH\n+@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/2\n+GGACCTGGGCACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/2\n+GGTGGGGGTCGTGGAGTGGGGGAGGGAGGCCAGCCGGGCTCCAGAGGGGT\n++\n+HHHHHHGGGHGBGEFHHHFHHG9GGC;HHEHHHCHFG@FFAA;=9DD;C7\n+@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/2\n+CCGTTTTGAACATGTGTAACCGACAGTCTGCCTGGGCCACAGCCCTCTCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHIHHHHHGHDHGHHHBHCFFFFHHHHH\n+@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/2\n+GGAAGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCT\n++\n+HHHHIHIHHHHHGHHHHHHHFHDHFGHHHHHEHEHHHHHHHHHHHHGHHH\n+@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/2\n+GGAGGCTGAAGTGCTGGACAGCCACGTAGGCCATGCCGAGGTAGGCAGCA\n++\n+HFHHHHHHHHHHIHGHHHHHHHHHHHHHHHHHEHHHHGGHH?FHHHHHGH\n+@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/2\n+GACATATTTGAGAGACACTGGGGAGACAGAATCGACCTGACCTTGCTGAC\n++\n+HHHHHHHHHHHHHHHHHHHHHHH@HHHEHHHFHHHHHGHAHFBEHHGFBG\n+@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/2\n+GTGACACTGCATTGCTGCTGCCAGCACCCCTTGTTAGGGTTTGTAATTGC\n++\n+F8HHHFGGG8DC>A>ADD1?##############################\n+@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/2\n+CTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGA\n++\n+HHHHHHIHHHHHDHHGHHHGHHHHHHHHHHHHFGHHHHFHHDHHHCHHHH\n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 test-data/output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output1.txt Thu Sep 07 15:06:58 2017 -0400 |
b |
b'@@ -0,0 +1,9901 @@\n+na\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102416384,102416631\t102416631,102417121\t102418700,102418759\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102656105,102656137\t102656244,102656269\t102656269,102656385\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t103451082,103451182\t103451182,103451285\t103452155,103452405\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579368,106579474\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579090,106579251\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t110875965,110876070\t110881232,110881274\t110881274,110881535\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t112182793,112182887\t112182887,112182891\t112183692,112183779\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t112443514,112443594\t112443594,112443606\t112445169,112445650\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t112446987,112447467\t112448377,112448381\t112448381,112448489\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113146010,113146097\t113150982,113150997\t113150997,113151123\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t11320856,11320898\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t11314138,11314258\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113588823,113589079\t113589079,113589082\t113589665,113589797\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113679668,113679978\t113691963,113691996\t113691996,113692070\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113697493,113697603\t113721000,113721031\t113721031,113721168\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t114129890,114129971\t114129971,114130669\t114131146,114131336\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t114447879,114448'..b'1387,94571429\t94573959,94574187\t94574553,94574736\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576709\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576805\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94577081,94577096\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94576661,94576709\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94577081,94577096\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94583987,94584125\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94582962,94583059\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94583987,94584125\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94576661,94576805\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94576661,94576709\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94588228,94588390\t94590412,94590552\t94590637,94590776\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0.0000\t\t\t\n' |
b |
diff -r d4ca551ca300 -r adc0f7765d85 test-data/output2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output2.txt Thu Sep 07 15:06:58 2017 -0400 |
b |
b'@@ -0,0 +1,9901 @@\n+AA-AA-10-100261028-100261044.0\t0.0010\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t63\t16\t85\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100275490-100275493.0\t0.0010\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t2213\t3\t105\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100286213-100286332.0\t0.0010\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t119\t108\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100329324-100329346.0\t0.0010\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t114\t22\t618\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100523929-100524139.0\t0.0010\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t201\t210\t156\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100827542-100827561.0\t0.0010\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t87\t19\t2380\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100983304-100984489.0\t0.0010\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t62\t1185\t1127\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101064205-101064260.0\t0.0010\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t562\t55\t161\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101136667-101136690.0\t0.0010\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t202\t23\t1099\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101601169-101601202.0\t0.0010\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t125\t33\t134\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101608834-101608866.0\t0.0010\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t73\t32\t71\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101608834-101608870.0\t0.0010\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t134\t36\t67\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101774879-101774912.0\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t87\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101774879-101774912.1\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t37\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101828696-101828717.0\t0.0010\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t70\t21\t125\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101829897-101829942.0\t0.0010\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t54\t45\t96\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102138618-102138719.0\t0.0010\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t189\t101\t46\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102148627-102148633.0\t0.0010\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t6\t61\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102157185-102157188.0\t0.0010\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t56\t3\t140\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102359266-102359269.0\t0.0010\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t224\t3\t166\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102402251-102402254.0\t0.0010\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t112\t3\t275\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102416631-10'..b'40\t139\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94590637-94590776.0\t0.0010\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t140\t139\t84\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94592228-94592312.0\t0.0010\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t139\t84\t120\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94592394-94592514.0\t0.0010\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t84\t120\t117\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94593498-94593615.0\t0.0010\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t120\t117\t160\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94594694-94594854.0\t0.0010\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t160\t97\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94596859-94596956.0\t0.0010\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t160\t97\t77\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94597034-94597111.0\t0.0010\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t97\t77\t572\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94687769-94687932.0\t0.0010\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t494\t163\t150\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94688124-94688274.0\t0.0010\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94694916-94695077.0\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94694916-94695077.1\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94720395-94720537.0\t0.0010\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94720395-94720537.1\t0.0010\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t142\t188\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94724345-94724533.0\t0.0010\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94733296-94733438.0\t0.0010\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t188\t142\t929\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94775057-94775220.0\t0.0010\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t193\t163\t150\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94775389-94775539.0\t0.0010\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94780498-94780659.0\t0.0010\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94781820-94781997.0\t0.0010\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t177\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94820495-94820637.0\t0.0010\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94842836-94843024.0\t0.0010\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94849916-94850058.0\t0.0010\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0\t0\t0\t0.0000\t\n\\ No newline at end of file\n' |