Mercurial > repos > bioitcore > splicetrap
comparison bin/TXdbgen @ 1:adc0f7765d85 draft
planemo upload
| author | bioitcore | 
|---|---|
| date | Thu, 07 Sep 2017 15:06:58 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 0:d4ca551ca300 | 1:adc0f7765d85 | 
|---|---|
| 1 #!/usr/bin/perl | |
| 2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; | |
| 3 # this script is to generate TXdb database files from bed/gtf file | |
| 4 | |
| 5 use strict; | |
| 6 use Cwd; | |
| 7 use Getopt::Long; | |
| 8 | |
| 9 my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); | |
| 10 foreach my $program (@programs) | |
| 11 { | |
| 12 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); | |
| 13 | |
| 14 } | |
| 15 | |
| 16 | |
| 17 my $genomedir = ""; | |
| 18 | |
| 19 my $annofilename = ""; | |
| 20 my $txdbname = "userdefined"; | |
| 21 my $knownonly = 0; | |
| 22 my $gtfinput = 0; | |
| 23 | |
| 24 GetOptions ( | |
| 25 "g:s"=>\$genomedir, | |
| 26 "a:s"=>\$annofilename, | |
| 27 "n:s"=>\$txdbname, | |
| 28 "gtf"=>\$gtfinput, | |
| 29 "knownonly"=>\$knownonly | |
| 30 ); | |
| 31 | |
| 32 my $InputParaDes=" Usage of the script: | |
| 33 -g genome fasta file location | |
| 34 -a annotation file (bed/gtf) | |
| 35 -n txdb name | |
| 36 --gtf specify this if annotation file is in gtf format | |
| 37 "; | |
| 38 | |
| 39 if($genomedir eq "" or $annofilename eq "") | |
| 40 { | |
| 41 print $InputParaDes; | |
| 42 exit; | |
| 43 } | |
| 44 | |
| 45 $genomedir = Cwd::abs_path($genomedir); | |
| 46 $annofilename = Cwd::abs_path($annofilename); | |
| 47 | |
| 48 my $annofilebase = `basename $annofilename`; | |
| 49 chomp($annofilebase); | |
| 50 #need a cache folder to avoid mess | |
| 51 | |
| 52 my $cachefolder = $annofilebase.".cache"; | |
| 53 | |
| 54 if (! -e $cachefolder) | |
| 55 { | |
| 56 mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; | |
| 57 } | |
| 58 if($gtfinput) | |
| 59 { | |
| 60 print "TXDBGEN: converting gtf file into bed format\n"; | |
| 61 system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); | |
| 62 $annofilename = "$cachefolder/$annofilebase.bed"; | |
| 63 } | |
| 64 | |
| 65 | |
| 66 print "TXDBGEN: scan $annofilename for AS events...\n"; | |
| 67 system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); | |
| 68 print "TXDBGEN: fetch sequences from $genomedir...\n"; | |
| 69 system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); | |
| 70 #get fasta file list | |
| 71 system("ls $genomedir/*.fa >$cachefolder/chr.list"); | |
| 72 | |
| 73 system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); | |
| 74 | |
| 75 print "TXDBGEN: generate files for parallel computing...\n"; | |
| 76 if (! -e "$cachefolder/parallel") | |
| 77 { | |
| 78 mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; | |
| 79 } | |
| 80 system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); | |
| 81 system("rm $cachefolder/out.bed"); | |
| 82 system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); | |
| 83 system("rm $cachefolder/TXdb.tmp.evi"); | |
| 84 system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); | |
| 85 print "TXDBGEN: build Bowtie index...\n"; | |
| 86 | |
| 87 if (! -e "$cachefolder/btw") | |
| 88 { | |
| 89 mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; | |
| 90 } | |
| 91 system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); | |
| 92 system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); | |
| 93 print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; | |
| 94 | |
| 95 system("mv $cachefolder $SrcFolder/../db/$txdbname"); | |
| 96 print "TXDBGEN: Done!\n"; | |
| 97 | 
