view TXdbgen @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
line wrap: on
line source

#!/usr/bin/perl
my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
# this script is to generate TXdb database files from bed/gtf file

use strict;
use Cwd;
use Getopt::Long;

my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
foreach my $program (@programs)
{
        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));

}


my $genomedir = "";

my $annofilename = "";
my $txdbname = "userdefined";
my $knownonly = 0;
my $gtfinput = 0;

GetOptions (
	"g:s"=>\$genomedir,
	"a:s"=>\$annofilename,
	"n:s"=>\$txdbname,
	"gtf"=>\$gtfinput,
	"knownonly"=>\$knownonly
);

my $InputParaDes="      Usage of the script:
        -g      genome fasta file location
	-a	annotation file (bed/gtf)
	-n	txdb name
	--gtf	specify this if annotation file is in gtf format
";

if($genomedir eq "" or $annofilename eq "")
{
	print $InputParaDes;
	exit;
}

$genomedir = Cwd::abs_path($genomedir);
$annofilename = Cwd::abs_path($annofilename);

my $annofilebase = `basename $annofilename`;
chomp($annofilebase);
#need a cache folder to avoid mess

my $cachefolder = $annofilebase.".cache";

if (! -e $cachefolder)
{
	mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
}
if($gtfinput)
{
	print "TXDBGEN: converting gtf file into bed format\n";
	system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");	
	$annofilename = "$cachefolder/$annofilebase.bed";
}


print "TXDBGEN: scan $annofilename for AS events...\n";
system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
print "TXDBGEN: fetch sequences from $genomedir...\n";
system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
#get fasta file list
system("ls $genomedir/*.fa >$cachefolder/chr.list");

system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");

print "TXDBGEN: generate files for parallel computing...\n";
if (! -e "$cachefolder/parallel")
{
	mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
}
system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
system("rm $cachefolder/out.bed");
system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
system("rm $cachefolder/TXdb.tmp.evi");
system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
print "TXDBGEN: build Bowtie index...\n";

if (! -e "$cachefolder/btw")
{
	mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
}
system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";

system("mv $cachefolder $SrcFolder/../db/$txdbname");
print "TXDBGEN: Done!\n";