Mercurial > repos > bioitcore > splicetrap
view SpliceTrap.pl @ 5:2ebca9da5e42 draft default tip
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 17:39:24 -0400 |
parents | adc0f7765d85 |
children |
line wrap: on
line source
#!/usr/bin/perl # Author: wuj@cshl.edu # Modified: Baekdoo Kim (baegi7942@gmail.com) use strict; use Getopt::Long; use Data::Dumper; #################### use Cwd; my $PROG = $0; my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); my $PROG_ABS_PATH = Cwd::abs_path($PROG); #my $SrcFolder=`dirname $PROG_ABS_PATH`; #chomp($SrcFolder); #my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; #my $SrcFolder=$config{SrcFolder}; my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); foreach my $program (@programs) { die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); } #################### my $SrcFolder=""; my $MapSoftware="bowtie"; my $DatabasePrefix="hg38"; my $ReadFileFormat=""; my $ReadFile1Name=""; my $ReadFile2Name=""; my $CutoffLevel="M"; my $Outputfolder=$CUR_DIR; my $OutputPrefix="Result"; #my $CutoffOnly=0; my $ReadSize=36; my $JunctionCut=5; my $onGalaxy_raw=""; my $onGalaxy_txt=""; my $BowtieThreads=1; my $noIRMstr=""; my $noIRM = 0; my $num_args = $#ARGV; $onGalaxy_raw = $ARGV[$num_args-1]; $onGalaxy_txt = $ARGV[$num_args]; GetOptions ( "l:s"=>\$SrcFolder, "m:s"=>\$MapSoftware, "d:s"=>\$DatabasePrefix, # "f:s"=>\$ReadFileFormat, "1:s"=>\$ReadFile1Name, "2:s"=>\$ReadFile2Name, "c:s"=>\$CutoffLevel, "outdir:s"=>\$Outputfolder, "o:s"=>\$OutputPrefix, "j:i"=>\$JunctionCut, "s:i"=>\$ReadSize, "p:i"=>\$BowtieThreads, "noIRM|noirm"=>\$noIRM # "local:s"=>\$local, # "rerun"=>\$CutoffOnly ); #-O for galaxy output my $InputParaDes=" Usage of the script: -l Base Location (required) -m Mapping software: [bowtie]/rmap -d Database prefix: [hg18]/mm9/rn4/userdefined -1 Read File 1 -2 Read File 2 -c Cutoff Level:H/[M]/L Means High, Middle or Low -j Junction reads requirement per junction for each exon-isoform [5] -o Output prefix {Result} -s Read Size [36] --outdir Output folder [./] -p Bowtie parameter, threads number, only use this when you don't use qsub [1] --noIRM Skip the IRM correction step This is a quick help, please refer to the README file for details. "; if($SrcFolder eq "") { print "[CHECK] - Please provide the location of the script (option '-l')\n\n"; exit; } if($ReadFile2Name eq "") { $ReadFile2Name = $ReadFile1Name; #trigger singled end mode } if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) { print $InputParaDes; exit; } if($BowtieThreads < 1) { print $InputParaDes; exit; } if (! -e "$SrcFolder/db/$DatabasePrefix/parallel") { print "CHECK: Error, the database you specified is not properly installed.\n"; #print $InputParaDes; exit; } if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") { print $InputParaDes; exit; } $ReadFile1Name = Cwd::abs_path($ReadFile1Name); $ReadFile2Name = Cwd::abs_path($ReadFile2Name); #check the files open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); my $checkoneline = <check>; if(substr($checkoneline,0,1) eq ">") { $ReadFileFormat = "fasta"; } elsif(substr($checkoneline,0,1) eq "@") { $ReadFileFormat = "fastq"; } else { die("CHECK: ERROR:Please check $ReadFile1Name\n"); } close(check); open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); my $checkoneline = <check>; if(substr($checkoneline,0,1) eq ">") { die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); } elsif(substr($checkoneline,0,1) eq "@") { die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); } else { die("CHECK: ERROR:Please check $ReadFile2Name\n"); } close(check); $Outputfolder= Cwd::abs_path($Outputfolder); if($Outputfolder eq "/tmp") { while(-e $Outputfolder) { my $random_foldername = random_sessid(); $Outputfolder = "/tmp/".$random_foldername; } } if(! -e $Outputfolder) { mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; } if(! -d $Outputfolder) { die "CHECK: $Outputfolder is not a folder\n"; } if($MapSoftware eq "bowtie") { print "CHECK: whether bowtie installed and in PATH\n"; my $bowtiechecker=`bowtie --version`; if($bowtiechecker ne "") { print "CHECK: bowtie found, information below:\n"; print $bowtiechecker,"\n"; } else { die "CHECK: No bowtie found in PATH, EXIT!\n"; } } elsif($MapSoftware eq "rmap") { print "CHECK: checking rmap...\n"; if(system("type rmap &>/dev/null") ==0 ) { print "CHECK: rmap found, continue\n"; } else { die "CHECK: No rmap found in PATH, EXIT!\n"; } } else { die "CHECK: option -m only takes rmap or bowtie as inputs\n"; } if($ReadSize == 0) { die "CHECK: Please check option -s Read size\n"; } if($noIRM) { $noIRMstr= "noirm"; } #write more checks later print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); print "PARAMETERS:\tGalaxy_raw: ",$onGalaxy_raw,"\n"; #if($onGalaxy_raw ne ""); print "PARAMETERS:\tGalaxy_txt: ",$onGalaxy_txt,"\n"; #if($onGalaxy_txt ne ""); print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; print "PARAMETERS:\tNo IRM.\n" if ($noIRM); if($MapSoftware eq "bowtie") { print "=================STAGE 1 MAPPING===================\n"; system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); print "=================STAGE 2 ESTIMATION================\n"; # ratio, log, nums system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; print "=================STAGE 3 CUTOFF====================\n"; # raw system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); } if($MapSoftware eq "rmap") { print "=================STAGE 1 MAPPING===================\n"; system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); print "=================STAGE 2 ESTIMATION================\n"; system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; print "=================STAGE 3 CUTOFF====================\n"; system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); } #print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; if($onGalaxy_raw ne "" && $onGalaxy_txt ne "") { print "OUTPUTFILE:$OutputPrefix.raw\n"; system("grep -v na $Outputfolder/$OutputPrefix.raw >$onGalaxy_raw"); print "OUTPUTFILE:$OutputPrefix.txt\n"; system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy_txt"); } print "============Clean up\n"; system("rm -r $Outputfolder/$OutputPrefix.*"); sub random_sessid { #my @chars = (0..9,a..z,A..Z); my @chars = ('a'..'z','A'..'Z'); my $len = 10; my $string = join '', map {$chars[rand(@chars)]} (1..$len); return $string; }