1
|
1 #!/usr/bin/perl
|
|
2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
|
|
3 # this script is to generate TXdb database files from bed/gtf file
|
|
4
|
|
5 use strict;
|
|
6 use Cwd;
|
|
7 use Getopt::Long;
|
|
8
|
|
9 my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
|
|
10 foreach my $program (@programs)
|
|
11 {
|
|
12 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
|
|
13
|
|
14 }
|
|
15
|
|
16
|
|
17 my $genomedir = "";
|
|
18
|
|
19 my $annofilename = "";
|
|
20 my $txdbname = "userdefined";
|
|
21 my $knownonly = 0;
|
|
22 my $gtfinput = 0;
|
|
23
|
|
24 GetOptions (
|
|
25 "g:s"=>\$genomedir,
|
|
26 "a:s"=>\$annofilename,
|
|
27 "n:s"=>\$txdbname,
|
|
28 "gtf"=>\$gtfinput,
|
|
29 "knownonly"=>\$knownonly
|
|
30 );
|
|
31
|
|
32 my $InputParaDes=" Usage of the script:
|
|
33 -g genome fasta file location
|
|
34 -a annotation file (bed/gtf)
|
|
35 -n txdb name
|
|
36 --gtf specify this if annotation file is in gtf format
|
|
37 ";
|
|
38
|
|
39 if($genomedir eq "" or $annofilename eq "")
|
|
40 {
|
|
41 print $InputParaDes;
|
|
42 exit;
|
|
43 }
|
|
44
|
|
45 $genomedir = Cwd::abs_path($genomedir);
|
|
46 $annofilename = Cwd::abs_path($annofilename);
|
|
47
|
|
48 my $annofilebase = `basename $annofilename`;
|
|
49 chomp($annofilebase);
|
|
50 #need a cache folder to avoid mess
|
|
51
|
|
52 my $cachefolder = $annofilebase.".cache";
|
|
53
|
|
54 if (! -e $cachefolder)
|
|
55 {
|
|
56 mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
|
|
57 }
|
|
58 if($gtfinput)
|
|
59 {
|
|
60 print "TXDBGEN: converting gtf file into bed format\n";
|
|
61 system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");
|
|
62 $annofilename = "$cachefolder/$annofilebase.bed";
|
|
63 }
|
|
64
|
|
65
|
|
66 print "TXDBGEN: scan $annofilename for AS events...\n";
|
|
67 system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
|
|
68 print "TXDBGEN: fetch sequences from $genomedir...\n";
|
|
69 system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
|
|
70 #get fasta file list
|
|
71 system("ls $genomedir/*.fa >$cachefolder/chr.list");
|
|
72
|
|
73 system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
|
|
74
|
|
75 print "TXDBGEN: generate files for parallel computing...\n";
|
|
76 if (! -e "$cachefolder/parallel")
|
|
77 {
|
|
78 mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
|
|
79 }
|
|
80 system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
|
|
81 system("rm $cachefolder/out.bed");
|
|
82 system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
|
|
83 system("rm $cachefolder/TXdb.tmp.evi");
|
|
84 system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
|
|
85 print "TXDBGEN: build Bowtie index...\n";
|
|
86
|
|
87 if (! -e "$cachefolder/btw")
|
|
88 {
|
|
89 mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
|
|
90 }
|
|
91 system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
|
|
92 system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
|
|
93 print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
|
|
94
|
|
95 system("mv $cachefolder $SrcFolder/../db/$txdbname");
|
|
96 print "TXDBGEN: Done!\n";
|
|
97
|