annotate TXdbgen @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 #!/usr/bin/perl
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3 # this script is to generate TXdb database files from bed/gtf file
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5 use strict;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6 use Cwd;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 use Getopt::Long;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9 my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10 foreach my $program (@programs)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17 my $genomedir = "";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19 my $annofilename = "";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 my $txdbname = "userdefined";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21 my $knownonly = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 my $gtfinput = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24 GetOptions (
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 "g:s"=>\$genomedir,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 "a:s"=>\$annofilename,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 "n:s"=>\$txdbname,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 "gtf"=>\$gtfinput,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 "knownonly"=>\$knownonly
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30 );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32 my $InputParaDes=" Usage of the script:
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33 -g genome fasta file location
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34 -a annotation file (bed/gtf)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 -n txdb name
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 --gtf specify this if annotation file is in gtf format
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 ";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39 if($genomedir eq "" or $annofilename eq "")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41 print $InputParaDes;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42 exit;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45 $genomedir = Cwd::abs_path($genomedir);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 $annofilename = Cwd::abs_path($annofilename);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 my $annofilebase = `basename $annofilename`;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 chomp($annofilebase);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50 #need a cache folder to avoid mess
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 my $cachefolder = $annofilebase.".cache";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54 if (! -e $cachefolder)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56 mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
57 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
58 if($gtfinput)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
59 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
60 print "TXDBGEN: converting gtf file into bed format\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
61 system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
62 $annofilename = "$cachefolder/$annofilebase.bed";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
63 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
64
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
65
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
66 print "TXDBGEN: scan $annofilename for AS events...\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
67 system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
68 print "TXDBGEN: fetch sequences from $genomedir...\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
69 system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
70 #get fasta file list
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
71 system("ls $genomedir/*.fa >$cachefolder/chr.list");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
72
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
73 system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
74
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
75 print "TXDBGEN: generate files for parallel computing...\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
76 if (! -e "$cachefolder/parallel")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
77 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
78 mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
79 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
80 system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
81 system("rm $cachefolder/out.bed");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
82 system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
83 system("rm $cachefolder/TXdb.tmp.evi");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
84 system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
85 print "TXDBGEN: build Bowtie index...\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
86
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
87 if (! -e "$cachefolder/btw")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
88 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
89 mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
90 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
91 system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
92 system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
93 print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
94
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
95 system("mv $cachefolder $SrcFolder/../db/$txdbname");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
96 print "TXDBGEN: Done!\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
97