Mercurial > repos > bioitcore > splicetrap
diff TXdbgen @ 1:adc0f7765d85 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 15:06:58 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TXdbgen Thu Sep 07 15:06:58 2017 -0400 @@ -0,0 +1,97 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is to generate TXdb database files from bed/gtf file + +use strict; +use Cwd; +use Getopt::Long; + +my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + + +my $genomedir = ""; + +my $annofilename = ""; +my $txdbname = "userdefined"; +my $knownonly = 0; +my $gtfinput = 0; + +GetOptions ( + "g:s"=>\$genomedir, + "a:s"=>\$annofilename, + "n:s"=>\$txdbname, + "gtf"=>\$gtfinput, + "knownonly"=>\$knownonly +); + +my $InputParaDes=" Usage of the script: + -g genome fasta file location + -a annotation file (bed/gtf) + -n txdb name + --gtf specify this if annotation file is in gtf format +"; + +if($genomedir eq "" or $annofilename eq "") +{ + print $InputParaDes; + exit; +} + +$genomedir = Cwd::abs_path($genomedir); +$annofilename = Cwd::abs_path($annofilename); + +my $annofilebase = `basename $annofilename`; +chomp($annofilebase); +#need a cache folder to avoid mess + +my $cachefolder = $annofilebase.".cache"; + +if (! -e $cachefolder) +{ + mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; +} +if($gtfinput) +{ + print "TXDBGEN: converting gtf file into bed format\n"; + system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); + $annofilename = "$cachefolder/$annofilebase.bed"; +} + + +print "TXDBGEN: scan $annofilename for AS events...\n"; +system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); +print "TXDBGEN: fetch sequences from $genomedir...\n"; +system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); +#get fasta file list +system("ls $genomedir/*.fa >$cachefolder/chr.list"); + +system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); + +print "TXDBGEN: generate files for parallel computing...\n"; +if (! -e "$cachefolder/parallel") +{ + mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; +} +system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); +system("rm $cachefolder/out.bed"); +system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); +system("rm $cachefolder/TXdb.tmp.evi"); +system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); +print "TXDBGEN: build Bowtie index...\n"; + +if (! -e "$cachefolder/btw") +{ + mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; +} +system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); +system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); +print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; + +system("mv $cachefolder $SrcFolder/../db/$txdbname"); +print "TXDBGEN: Done!\n"; +