diff TXdbgen @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TXdbgen	Thu Sep 07 15:06:58 2017 -0400
@@ -0,0 +1,97 @@
+#!/usr/bin/perl
+my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
+# this script is to generate TXdb database files from bed/gtf file
+
+use strict;
+use Cwd;
+use Getopt::Long;
+
+my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
+foreach my $program (@programs)
+{
+        die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
+
+}
+
+
+my $genomedir = "";
+
+my $annofilename = "";
+my $txdbname = "userdefined";
+my $knownonly = 0;
+my $gtfinput = 0;
+
+GetOptions (
+	"g:s"=>\$genomedir,
+	"a:s"=>\$annofilename,
+	"n:s"=>\$txdbname,
+	"gtf"=>\$gtfinput,
+	"knownonly"=>\$knownonly
+);
+
+my $InputParaDes="      Usage of the script:
+        -g      genome fasta file location
+	-a	annotation file (bed/gtf)
+	-n	txdb name
+	--gtf	specify this if annotation file is in gtf format
+";
+
+if($genomedir eq "" or $annofilename eq "")
+{
+	print $InputParaDes;
+	exit;
+}
+
+$genomedir = Cwd::abs_path($genomedir);
+$annofilename = Cwd::abs_path($annofilename);
+
+my $annofilebase = `basename $annofilename`;
+chomp($annofilebase);
+#need a cache folder to avoid mess
+
+my $cachefolder = $annofilebase.".cache";
+
+if (! -e $cachefolder)
+{
+	mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
+}
+if($gtfinput)
+{
+	print "TXDBGEN: converting gtf file into bed format\n";
+	system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");	
+	$annofilename = "$cachefolder/$annofilebase.bed";
+}
+
+
+print "TXDBGEN: scan $annofilename for AS events...\n";
+system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
+print "TXDBGEN: fetch sequences from $genomedir...\n";
+system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
+#get fasta file list
+system("ls $genomedir/*.fa >$cachefolder/chr.list");
+
+system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
+
+print "TXDBGEN: generate files for parallel computing...\n";
+if (! -e "$cachefolder/parallel")
+{
+	mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
+}
+system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
+system("rm $cachefolder/out.bed");
+system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
+system("rm $cachefolder/TXdb.tmp.evi");
+system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
+print "TXDBGEN: build Bowtie index...\n";
+
+if (! -e "$cachefolder/btw")
+{
+	mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
+}
+system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
+system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
+print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
+
+system("mv $cachefolder $SrcFolder/../db/$txdbname");
+print "TXDBGEN: Done!\n";
+