Mercurial > repos > bioitcore > splicetrap
diff SpliceTrap.pl @ 1:adc0f7765d85 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 15:06:58 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SpliceTrap.pl Thu Sep 07 15:06:58 2017 -0400 @@ -0,0 +1,285 @@ +#!/usr/bin/perl +# Author: wuj@cshl.edu +# Modified: Baekdoo Kim (baegi7942@gmail.com) +use strict; +use Getopt::Long; +use Data::Dumper; +#################### +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +#my $SrcFolder=`dirname $PROG_ABS_PATH`; +#chomp($SrcFolder); +#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +#my $SrcFolder=$config{SrcFolder}; + +my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + +#################### +my $SrcFolder=""; +my $MapSoftware="bowtie"; +my $DatabasePrefix="hg38"; +my $ReadFileFormat=""; +my $ReadFile1Name=""; +my $ReadFile2Name=""; +my $CutoffLevel="M"; +my $Outputfolder=$CUR_DIR; +my $OutputPrefix="Result"; +#my $CutoffOnly=0; +my $ReadSize=36; +my $JunctionCut=5; +my $onGalaxy_raw=""; +my $onGalaxy_txt=""; +my $BowtieThreads=1; +my $noIRMstr=""; +my $noIRM = 0; + +my $num_args = $#ARGV; +$onGalaxy_raw = $ARGV[$num_args-1]; +$onGalaxy_txt = $ARGV[$num_args]; + +GetOptions ( + "l:s"=>\$SrcFolder, + "m:s"=>\$MapSoftware, + "d:s"=>\$DatabasePrefix, +# "f:s"=>\$ReadFileFormat, + "1:s"=>\$ReadFile1Name, + "2:s"=>\$ReadFile2Name, + "c:s"=>\$CutoffLevel, + "outdir:s"=>\$Outputfolder, + "o:s"=>\$OutputPrefix, + "j:i"=>\$JunctionCut, + "s:i"=>\$ReadSize, + "p:i"=>\$BowtieThreads, + "noIRM|noirm"=>\$noIRM +# "local:s"=>\$local, +# "rerun"=>\$CutoffOnly +); +#-O for galaxy output + + +my $InputParaDes=" Usage of the script: + -l Base Location (required) + -m Mapping software: [bowtie]/rmap + -d Database prefix: [hg18]/mm9/rn4/userdefined + -1 Read File 1 + -2 Read File 2 + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads requirement per junction for each exon-isoform [5] + -o Output prefix {Result} + -s Read Size [36] + --outdir Output folder [./] + -p Bowtie parameter, threads number, only use this when you don't use qsub [1] + --noIRM Skip the IRM correction step + + This is a quick help, please refer to the README file for details. +"; + + +if($SrcFolder eq "") { + print "[CHECK] - Please provide the location of the script (option '-l')\n\n"; + exit; +} + +if($ReadFile2Name eq "") +{ + $ReadFile2Name = $ReadFile1Name; + #trigger singled end mode +} + +if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) +{ + print $InputParaDes; + exit; +} + +if($BowtieThreads < 1) +{ + print $InputParaDes; + exit; +} + +if (! -e "$SrcFolder/db/$DatabasePrefix/parallel") +{ + print "CHECK: Error, the database you specified is not properly installed.\n"; + #print $InputParaDes; + exit; + +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} + + +$ReadFile1Name = Cwd::abs_path($ReadFile1Name); +$ReadFile2Name = Cwd::abs_path($ReadFile2Name); + +#check the files +open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + $ReadFileFormat = "fasta"; +} +elsif(substr($checkoneline,0,1) eq "@") +{ + $ReadFileFormat = "fastq"; +} +else +{ + die("CHECK: ERROR:Please check $ReadFile1Name\n"); +} +close(check); + +open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); +} +elsif(substr($checkoneline,0,1) eq "@") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); +} +else +{ + die("CHECK: ERROR:Please check $ReadFile2Name\n"); +} +close(check); + +$Outputfolder= Cwd::abs_path($Outputfolder); +if($Outputfolder eq "/tmp") +{ + while(-e $Outputfolder) + { + my $random_foldername = random_sessid(); + $Outputfolder = "/tmp/".$random_foldername; + } +} + + +if(! -e $Outputfolder) +{ + mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; +} +if(! -d $Outputfolder) +{ + die "CHECK: $Outputfolder is not a folder\n"; +} + +if($MapSoftware eq "bowtie") +{ + print "CHECK: whether bowtie installed and in PATH\n"; + my $bowtiechecker=`bowtie --version`; + if($bowtiechecker ne "") + { + print "CHECK: bowtie found, information below:\n"; + print $bowtiechecker,"\n"; + } + else + { + die "CHECK: No bowtie found in PATH, EXIT!\n"; + } +} +elsif($MapSoftware eq "rmap") +{ + print "CHECK: checking rmap...\n"; + if(system("type rmap &>/dev/null") ==0 ) + { + print "CHECK: rmap found, continue\n"; + } + else + { + die "CHECK: No rmap found in PATH, EXIT!\n"; + } +} +else +{ + die "CHECK: option -m only takes rmap or bowtie as inputs\n"; +} + +if($ReadSize == 0) +{ + die "CHECK: Please check option -s Read size\n"; +} + +if($noIRM) +{ + $noIRMstr= "noirm"; +} + +#write more checks later +print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; +print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; +print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; +print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); +print "PARAMETERS:\tGalaxy_raw: ",$onGalaxy_raw,"\n"; #if($onGalaxy_raw ne ""); +print "PARAMETERS:\tGalaxy_txt: ",$onGalaxy_txt,"\n"; #if($onGalaxy_txt ne ""); +print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; +print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; +print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; +print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; +print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; +print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; +print "PARAMETERS:\tNo IRM.\n" if ($noIRM); + +if($MapSoftware eq "bowtie") +{ + print "=================STAGE 1 MAPPING===================\n"; + system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); + system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + # ratio, log, nums + system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + # raw + system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +if($MapSoftware eq "rmap") +{ + print "=================STAGE 1 MAPPING===================\n"; + + system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; + system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +#print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; + +if($onGalaxy_raw ne "" && $onGalaxy_txt ne "") +{ + print "OUTPUTFILE:$OutputPrefix.raw\n"; + system("grep -v na $Outputfolder/$OutputPrefix.raw >$onGalaxy_raw"); + print "OUTPUTFILE:$OutputPrefix.txt\n"; + system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy_txt"); +} + +print "============Clean up\n"; +system("rm -r $Outputfolder/$OutputPrefix.*"); + +sub random_sessid { + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; +}