annotate rfam.pl @ 46:ca05d68aca13 draft

Uploaded
author big-tiandm
date Thu, 13 Nov 2014 22:43:35 -0500
parents 0c4e11018934
children c75593f79aa9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
37
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Tian Dongmei
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
4 #Email: tiandm@big.ac.cn
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2013/7/19
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
7 #Description:
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
9
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
10 use strict;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
11 use Getopt::Long;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
12 use File::Basename;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
13
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
14 my %opts;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
15 GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","time:s","h");
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
16 if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
17 &usage;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
18 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
19
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
20 my $filein=$opts{'i'};
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
21 my $dir=$opts{'o'};
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
22 unless ($dir=~/\/$/) {$dir.="/";}
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
23 my $rfam=$opts{'ref'};
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
24 my $mis=defined $opts{'v'}? $opts{'v'} : 0;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
25 my $index=defined $opts{'index'} ? $opts{'index'} : "";
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
26 my $threads=defined $opts{'p'} ? $opts{'p'} : 1;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
27
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
28 if (not -d $dir) {
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
29 mkdir $dir;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
30 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
31
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
32
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
33 my $time=Time();
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
34 if (defined $opts{'time'}) {
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
35 $time=$opts{'time'};
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
36 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
37 my $mapdir=$dir."/rfam_match_".$time;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
38 if(not -d $mapdir){
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
39 mkdir $mapdir;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
40 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
41 chdir $mapdir;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
42 ###check genome index
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
43 if (-s $index.".1.ebwt") {
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
44 }else{
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
45 &checkACGT($rfam);
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
46 `bowtie-build $rfam $rfam`;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
47 $index="$rfam";
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
48 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
49 ### genome mapping
44
0c4e11018934 Uploaded
big-tiandm
parents: 37
diff changeset
50 `bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`;
37
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
51
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
52 sub checkACGT{
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
53 my $string;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
54 open IN,"<$rfam";
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
55 while (my $aline=<IN>) {
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
56 if ($aline!~/^>/) {
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
57 $aline=~s/U/T/gi;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
58 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
59 $string .=$aline;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
60 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
61 close IN;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
62 $rfam=basename($rfam);
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
63 open OUT, ">$rfam";
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
64 print OUT $string;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
65 close OUT;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
66 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
67
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
68 sub Time{
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
69 my $time=time();
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
70 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
71 $month++;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
72 $year+=1900;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
73 if (length($sec) == 1) {$sec = "0"."$sec";}
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
74 if (length($min) == 1) {$min = "0"."$min";}
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
75 if (length($hour) == 1) {$hour = "0"."$hour";}
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
76 if (length($day) == 1) {$day = "0"."$day";}
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
77 if (length($month) == 1) {$month = "0"."$month";}
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
78 #print "$year-$month-$day $hour:$min:$sec\n";
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
79 return("$year-$month-$day-$hour-$min-$sec");
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
80 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
81 sub usage{
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
82 print <<"USAGE";
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
83 Version $version
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
84 Usage:
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
85 $0 -i -o
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
86 options:
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
87 -i input file# input reads fasta/fastq file
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
88 -ref input file# rfam file, which do not contain miRNAs
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
89 -index file-prefix #(must be indexed by bowtie-build) The parameter
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
90 string must be the prefix of the bowtie index. For instance, if
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
91 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
92 the prefix is 'h_sapiens_37_asm'.##can be null
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
93 -v <int> report end-to-end hits w/ <=v mismatches; ignore qualities,default 0;
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
94
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
95 -p/--threads <int> number of alignment threads to launch (default: 1)
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
96
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
97 -o output directory
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
98 -time sting #make directory time,default is the local time
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
99 -h help
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
100 USAGE
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
101 exit(1);
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
102 }
9ae0d25e4169 Uploaded
big-tiandm
parents:
diff changeset
103