Mercurial > repos > bioitcore > splicetrap
comparison bin/TXdbgen @ 1:adc0f7765d85 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 15:06:58 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d4ca551ca300 | 1:adc0f7765d85 |
---|---|
1 #!/usr/bin/perl | |
2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; | |
3 # this script is to generate TXdb database files from bed/gtf file | |
4 | |
5 use strict; | |
6 use Cwd; | |
7 use Getopt::Long; | |
8 | |
9 my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); | |
10 foreach my $program (@programs) | |
11 { | |
12 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); | |
13 | |
14 } | |
15 | |
16 | |
17 my $genomedir = ""; | |
18 | |
19 my $annofilename = ""; | |
20 my $txdbname = "userdefined"; | |
21 my $knownonly = 0; | |
22 my $gtfinput = 0; | |
23 | |
24 GetOptions ( | |
25 "g:s"=>\$genomedir, | |
26 "a:s"=>\$annofilename, | |
27 "n:s"=>\$txdbname, | |
28 "gtf"=>\$gtfinput, | |
29 "knownonly"=>\$knownonly | |
30 ); | |
31 | |
32 my $InputParaDes=" Usage of the script: | |
33 -g genome fasta file location | |
34 -a annotation file (bed/gtf) | |
35 -n txdb name | |
36 --gtf specify this if annotation file is in gtf format | |
37 "; | |
38 | |
39 if($genomedir eq "" or $annofilename eq "") | |
40 { | |
41 print $InputParaDes; | |
42 exit; | |
43 } | |
44 | |
45 $genomedir = Cwd::abs_path($genomedir); | |
46 $annofilename = Cwd::abs_path($annofilename); | |
47 | |
48 my $annofilebase = `basename $annofilename`; | |
49 chomp($annofilebase); | |
50 #need a cache folder to avoid mess | |
51 | |
52 my $cachefolder = $annofilebase.".cache"; | |
53 | |
54 if (! -e $cachefolder) | |
55 { | |
56 mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; | |
57 } | |
58 if($gtfinput) | |
59 { | |
60 print "TXDBGEN: converting gtf file into bed format\n"; | |
61 system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); | |
62 $annofilename = "$cachefolder/$annofilebase.bed"; | |
63 } | |
64 | |
65 | |
66 print "TXDBGEN: scan $annofilename for AS events...\n"; | |
67 system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); | |
68 print "TXDBGEN: fetch sequences from $genomedir...\n"; | |
69 system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); | |
70 #get fasta file list | |
71 system("ls $genomedir/*.fa >$cachefolder/chr.list"); | |
72 | |
73 system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); | |
74 | |
75 print "TXDBGEN: generate files for parallel computing...\n"; | |
76 if (! -e "$cachefolder/parallel") | |
77 { | |
78 mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; | |
79 } | |
80 system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); | |
81 system("rm $cachefolder/out.bed"); | |
82 system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); | |
83 system("rm $cachefolder/TXdb.tmp.evi"); | |
84 system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); | |
85 print "TXDBGEN: build Bowtie index...\n"; | |
86 | |
87 if (! -e "$cachefolder/btw") | |
88 { | |
89 mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; | |
90 } | |
91 system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); | |
92 system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); | |
93 print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; | |
94 | |
95 system("mv $cachefolder $SrcFolder/../db/$txdbname"); | |
96 print "TXDBGEN: Done!\n"; | |
97 |