Mercurial > repos > bioitcore > splicetrap
view TXdbgen @ 5:2ebca9da5e42 draft default tip
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 17:39:24 -0400 |
parents | adc0f7765d85 |
children |
line wrap: on
line source
#!/usr/bin/perl my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; # this script is to generate TXdb database files from bed/gtf file use strict; use Cwd; use Getopt::Long; my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); foreach my $program (@programs) { die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); } my $genomedir = ""; my $annofilename = ""; my $txdbname = "userdefined"; my $knownonly = 0; my $gtfinput = 0; GetOptions ( "g:s"=>\$genomedir, "a:s"=>\$annofilename, "n:s"=>\$txdbname, "gtf"=>\$gtfinput, "knownonly"=>\$knownonly ); my $InputParaDes=" Usage of the script: -g genome fasta file location -a annotation file (bed/gtf) -n txdb name --gtf specify this if annotation file is in gtf format "; if($genomedir eq "" or $annofilename eq "") { print $InputParaDes; exit; } $genomedir = Cwd::abs_path($genomedir); $annofilename = Cwd::abs_path($annofilename); my $annofilebase = `basename $annofilename`; chomp($annofilebase); #need a cache folder to avoid mess my $cachefolder = $annofilebase.".cache"; if (! -e $cachefolder) { mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; } if($gtfinput) { print "TXDBGEN: converting gtf file into bed format\n"; system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); $annofilename = "$cachefolder/$annofilebase.bed"; } print "TXDBGEN: scan $annofilename for AS events...\n"; system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); print "TXDBGEN: fetch sequences from $genomedir...\n"; system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); #get fasta file list system("ls $genomedir/*.fa >$cachefolder/chr.list"); system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); print "TXDBGEN: generate files for parallel computing...\n"; if (! -e "$cachefolder/parallel") { mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; } system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); system("rm $cachefolder/out.bed"); system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); system("rm $cachefolder/TXdb.tmp.evi"); system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); print "TXDBGEN: build Bowtie index...\n"; if (! -e "$cachefolder/btw") { mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; } system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; system("mv $cachefolder $SrcFolder/../db/$txdbname"); print "TXDBGEN: Done!\n";