| 
1
 | 
     1 #!/usr/bin/perl
 | 
| 
 | 
     2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
 | 
| 
 | 
     3 # this script is to generate TXdb database files from bed/gtf file
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 use strict;
 | 
| 
 | 
     6 use Cwd;
 | 
| 
 | 
     7 use Getopt::Long;
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
 | 
| 
 | 
    10 foreach my $program (@programs)
 | 
| 
 | 
    11 {
 | 
| 
 | 
    12         die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 }
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 my $genomedir = "";
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 my $annofilename = "";
 | 
| 
 | 
    20 my $txdbname = "userdefined";
 | 
| 
 | 
    21 my $knownonly = 0;
 | 
| 
 | 
    22 my $gtfinput = 0;
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 GetOptions (
 | 
| 
 | 
    25 	"g:s"=>\$genomedir,
 | 
| 
 | 
    26 	"a:s"=>\$annofilename,
 | 
| 
 | 
    27 	"n:s"=>\$txdbname,
 | 
| 
 | 
    28 	"gtf"=>\$gtfinput,
 | 
| 
 | 
    29 	"knownonly"=>\$knownonly
 | 
| 
 | 
    30 );
 | 
| 
 | 
    31 
 | 
| 
 | 
    32 my $InputParaDes="      Usage of the script:
 | 
| 
 | 
    33         -g      genome fasta file location
 | 
| 
 | 
    34 	-a	annotation file (bed/gtf)
 | 
| 
 | 
    35 	-n	txdb name
 | 
| 
 | 
    36 	--gtf	specify this if annotation file is in gtf format
 | 
| 
 | 
    37 ";
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 if($genomedir eq "" or $annofilename eq "")
 | 
| 
 | 
    40 {
 | 
| 
 | 
    41 	print $InputParaDes;
 | 
| 
 | 
    42 	exit;
 | 
| 
 | 
    43 }
 | 
| 
 | 
    44 
 | 
| 
 | 
    45 $genomedir = Cwd::abs_path($genomedir);
 | 
| 
 | 
    46 $annofilename = Cwd::abs_path($annofilename);
 | 
| 
 | 
    47 
 | 
| 
 | 
    48 my $annofilebase = `basename $annofilename`;
 | 
| 
 | 
    49 chomp($annofilebase);
 | 
| 
 | 
    50 #need a cache folder to avoid mess
 | 
| 
 | 
    51 
 | 
| 
 | 
    52 my $cachefolder = $annofilebase.".cache";
 | 
| 
 | 
    53 
 | 
| 
 | 
    54 if (! -e $cachefolder)
 | 
| 
 | 
    55 {
 | 
| 
 | 
    56 	mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
 | 
| 
 | 
    57 }
 | 
| 
 | 
    58 if($gtfinput)
 | 
| 
 | 
    59 {
 | 
| 
 | 
    60 	print "TXDBGEN: converting gtf file into bed format\n";
 | 
| 
 | 
    61 	system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");	
 | 
| 
 | 
    62 	$annofilename = "$cachefolder/$annofilebase.bed";
 | 
| 
 | 
    63 }
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 
 | 
| 
 | 
    66 print "TXDBGEN: scan $annofilename for AS events...\n";
 | 
| 
 | 
    67 system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
 | 
| 
 | 
    68 print "TXDBGEN: fetch sequences from $genomedir...\n";
 | 
| 
 | 
    69 system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
 | 
| 
 | 
    70 #get fasta file list
 | 
| 
 | 
    71 system("ls $genomedir/*.fa >$cachefolder/chr.list");
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 print "TXDBGEN: generate files for parallel computing...\n";
 | 
| 
 | 
    76 if (! -e "$cachefolder/parallel")
 | 
| 
 | 
    77 {
 | 
| 
 | 
    78 	mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
 | 
| 
 | 
    79 }
 | 
| 
 | 
    80 system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
 | 
| 
 | 
    81 system("rm $cachefolder/out.bed");
 | 
| 
 | 
    82 system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
 | 
| 
 | 
    83 system("rm $cachefolder/TXdb.tmp.evi");
 | 
| 
 | 
    84 system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
 | 
| 
 | 
    85 print "TXDBGEN: build Bowtie index...\n";
 | 
| 
 | 
    86 
 | 
| 
 | 
    87 if (! -e "$cachefolder/btw")
 | 
| 
 | 
    88 {
 | 
| 
 | 
    89 	mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
 | 
| 
 | 
    90 }
 | 
| 
 | 
    91 system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
 | 
| 
 | 
    92 system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
 | 
| 
 | 
    93 print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
 | 
| 
 | 
    94 
 | 
| 
 | 
    95 system("mv $cachefolder $SrcFolder/../db/$txdbname");
 | 
| 
 | 
    96 print "TXDBGEN: Done!\n";
 | 
| 
 | 
    97 
 |