# HG changeset patch # User big-tiandm # Date 1406280153 14400 # Node ID 141a337097e1e68fc3ba1ecb91a73fc2d1b161b0 # Parent a79212816cbcb3f57e58ab2b65d59de46f826d3c Uploaded diff -r a79212816cbc -r 141a337097e1 rfam.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rfam.pl Fri Jul 25 05:22:33 2014 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2013/7/19 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","time:s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $dir=$opts{'o'}; +unless ($dir=~/\/$/) {$dir.="/";} +my $rfam=$opts{'ref'}; +my $mis=defined $opts{'v'}? $opts{'v'} : 0; +my $index=defined $opts{'index'} ? $opts{'index'} : ""; +my $threads=defined $opts{'p'} ? $opts{'p'} : 1; + +if (not -d $dir) { + mkdir $dir; +} + + +my $time=Time(); +if (defined $opts{'time'}) { + $time=$opts{'time'}; +} +my $mapdir=$dir."/rfam_match_".$time; +if(not -d $mapdir){ + mkdir $mapdir; +} +chdir $mapdir; +###check genome index +if (-s $index.".1.ebwt") { +}else{ + &checkACGT($rfam); + `bowtie-build $rfam`; + $index="$rfam"; +} +### genome mapping +`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt`; + +sub checkACGT{ + my $string; + open IN,"<$rfam"; + while (my $aline=) { + if ($aline!~/^>/) { + $aline=~s/U/T/gi; + } + $string .=$aline; + } + close IN; + $rfam=basename($rfam); + open OUT, ">$rfam"; + print OUT $string; + close OUT; +} + +sub Time{ + my $time=time(); + my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; + $month++; + $year+=1900; + if (length($sec) == 1) {$sec = "0"."$sec";} + if (length($min) == 1) {$min = "0"."$min";} + if (length($hour) == 1) {$hour = "0"."$hour";} + if (length($day) == 1) {$day = "0"."$day";} + if (length($month) == 1) {$month = "0"."$month";} + #print "$year-$month-$day $hour:$min:$sec\n"; + return("$year-$month-$day-$hour-$min-$sec"); +} +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file# input reads fasta/fastq file +-ref input file# rfam file, which do not contain miRNAs +-index file-prefix #(must be indexed by bowtie-build) The parameter + string must be the prefix of the bowtie index. For instance, if + the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then + the prefix is 'h_sapiens_37_asm'.##can be null +-v report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; + +-p/--threads number of alignment threads to launch (default: 1) + +-o output directory +-time sting #make directory time,default is the local time +-h help +USAGE +exit(1); +} +