view bin/SpliceTrap.pl @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
line wrap: on
line source

# Author: wuj@cshl.edu
use strict;
use Getopt::Long;
####################
use Cwd;
my $PROG = $0;
my $CUR_DIR = Cwd::abs_path(Cwd::cwd());
my $PROG_ABS_PATH = Cwd::abs_path($PROG);
#my $SrcFolder=`dirname $PROG_ABS_PATH`;
#chomp($SrcFolder);
#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";
#my $SrcFolder=$config{SrcFolder};

my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' );
foreach my $program (@programs)
{
	die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
	
}

####################
my $MapSoftware="bowtie";
my $DatabasePrefix="hg18";
my $ReadFileFormat="";
my $ReadFile1Name="";
my $ReadFile2Name="";
my $CutoffLevel="M";
my $Outputfolder=$CUR_DIR;
my $OutputPrefix="Result";
#my $CutoffOnly=0;
my $ReadSize=36;
my $JunctionCut=5;
my $onGalaxy="";
my $BowtieThreads=1;
my $noIRMstr="";
my $noIRM = 0;

GetOptions (
	"m:s"=>\$MapSoftware,
	"d:s"=>\$DatabasePrefix,
#	"f:s"=>\$ReadFileFormat,
	"1:s"=>\$ReadFile1Name,
	"2:s"=>\$ReadFile2Name,
	"c:s"=>\$CutoffLevel,
	"outdir:s"=>\$Outputfolder,
	"o:s"=>\$OutputPrefix,
	"j:i"=>\$JunctionCut,
	"s:i"=>\$ReadSize,
	"p:i"=>\$BowtieThreads,
	"noIRM|noirm"=>\$noIRM,
	"g:s"=>\$onGalaxy
#	"local:s"=>\$local,
#	"rerun"=>\$CutoffOnly
);
#-O for galaxy output


my $InputParaDes="	Usage of the script:
	-m	Mapping software: [bowtie]/rmap
	-d	Database prefix: [hg18]/mm9/rn4/userdefined
	-1	Read File 1
	-2	Read File 2
	-c	Cutoff Level:H/[M]/L
		Means High, Middle or Low
	-j	Junction reads requirement per junction for each exon-isoform [5]
	-o	Output prefix {Result}
	-s	Read Size [36]
	--outdir Output folder [./]
	-p	Bowtie parameter, threads number, only use this when you don't use qsub [1]
	--noIRM	Skip the IRM correction step
	
	This is a quick help, please refer to the README file for details.
";

if($ReadFile2Name eq "")
{
	$ReadFile2Name = $ReadFile1Name;
	#trigger singled end mode
}

if($ReadFile1Name eq "" or $ReadFile2Name eq "" )
{
	print $InputParaDes;
	exit;
}

if($BowtieThreads < 1)
{
	print $InputParaDes;
	exit;
}

if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel")
{
	print "CHECK: Error, the database you specified is not properly installed.\n";
	#print $InputParaDes;
	exit;
	
}

if($CutoffLevel ne "H" and  $CutoffLevel ne "M" and  $CutoffLevel ne "L")
{
	print $InputParaDes;
        exit;
}

$ReadFile1Name = Cwd::abs_path($ReadFile1Name);
$ReadFile2Name = Cwd::abs_path($ReadFile2Name);

#check the files
open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n");
my $checkoneline = <check>;
if(substr($checkoneline,0,1) eq ">")
{
	$ReadFileFormat = "fasta";
}
elsif(substr($checkoneline,0,1) eq "@")
{
	$ReadFileFormat = "fastq";
}
else
{
	die("CHECK: ERROR:Please check $ReadFile1Name\n");
}
close(check);

open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n");
my $checkoneline = <check>;
if(substr($checkoneline,0,1) eq ">")
{
        die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta");
}
elsif(substr($checkoneline,0,1) eq "@")
{
        die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq");
}
else
{
        die("CHECK: ERROR:Please check $ReadFile2Name\n");
}
close(check);

$Outputfolder= Cwd::abs_path($Outputfolder);
if($Outputfolder eq "/tmp")
{
        while(-e $Outputfolder)
        {
                my $random_foldername = random_sessid();
                $Outputfolder = "/tmp/".$random_foldername;
        }
}


if(! -e $Outputfolder)
{
	mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n";
}
if(! -d $Outputfolder)
{
	die "CHECK: $Outputfolder is not a folder\n";
}

if($MapSoftware eq "bowtie")
{
	print "CHECK: whether bowtie installed and in PATH\n";
	my $bowtiechecker=`bowtie --version`;
	if($bowtiechecker ne "")
	{
		print "CHECK: bowtie found, information below:\n";
		print $bowtiechecker,"\n";
	}
	else
	{
		die "CHECK: No bowtie found in PATH, EXIT!\n";
	}
}
elsif($MapSoftware eq "rmap")
{
	print "CHECK: checking rmap...\n";
	if(system("type rmap &>/dev/null") ==0 )
	{
		print "CHECK: rmap found, continue\n";
	}
	else
	{
		die "CHECK: No rmap found in PATH, EXIT!\n";
	}		
}
else
{
	die "CHECK: option -m only takes rmap or bowtie as inputs\n";
}

if($ReadSize == 0)
{
	die "CHECK: Please check option -s Read size\n";
}

if($noIRM)
{
	$noIRMstr= "noirm";
}

#write more checks later
print "PARAMETERS:\tMapping software:  ",$MapSoftware,"\n";
print "PARAMETERS:\tDatabase prefix:   ",$DatabasePrefix,"\n";
print "PARAMETERS:\tRead end 1:        ",$ReadFile1Name,"\n";
print "PARAMETERS:\tRead end 2:        ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name);
print "PARAMETERS:\tCutoff level:      ",$CutoffLevel,"\n";
print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n";
print "PARAMETERS:\tOutput folder:     ",$Outputfolder,"\n";
print "PARAMETERS:\tOutput prefix:     ",$OutputPrefix,"\n";
print "PARAMETERS:\tRead size:         ",$ReadSize,"\n";
print "PARAMETERS:\tBowtie threads #:  ",$BowtieThreads,"\n";
print "PARAMETERS:\tNo IRM.\n" if ($noIRM);

if($MapSoftware eq "bowtie")
{
	print "=================STAGE 1 MAPPING===================\n";
	system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");
        system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);
	print "=================STAGE 2 ESTIMATION================\n";

	system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
	print "=================STAGE 3 CUTOFF====================\n";
        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");


}

if($MapSoftware eq "rmap")
{
	print "=================STAGE 1 MAPPING===================\n";
	
        system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;
        system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);
	print "=================STAGE 2 ESTIMATION================\n";
	
        system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;
	print "=================STAGE 3 CUTOFF====================\n";
        system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");


}

print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n";

if($onGalaxy ne "")
{
        system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy");
}

sub random_sessid
{
        #my @chars = (0..9,a..z,A..Z);
        my @chars = ('a'..'z','A'..'Z');
        my $len = 10;
        my $string = join '', map {$chars[rand(@chars)]} (1..$len);
        return $string;
     }