Previous changeset 23:45de5e1ff487 (2014-07-25) Next changeset 25:10df3c84d54a (2014-07-31) |
Commit message:
Deleted selected files |
removed:
collapseReads2Tags.pl convert_bowtie_to_blast.pl count_rfam_express.pl filterReadsByLength.pl html.pl miRNA_Express_and_sequence.pl miRPlant.xml precursors.pl quantify.pl rfam.pl tool_dependencies.xml |
b |
diff -r 45de5e1ff487 -r 5691802f074b collapseReads2Tags.pl --- a/collapseReads2Tags.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,170 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2014-3-20 -#Modified: -#Description: fastq file form reads cluster(the same sequence in the same cluster) -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i:s@","format=s","mark:s","qual:s","qv:i","o=s","h"); -if (!(defined $opts{o} and defined $opts{'format'}) || defined $opts{h}) { #necessary arguments -&usage; -} -my @filein=@{$opts{i}} if(defined $opts{i}); -my $name=defined $opts{'mark'} ? $opts{'mark'} : "seq"; -my $fileout=$opts{'o'}; -my $pq=defined $opts{'qv'} ? $opts{'qv'} : 33; -my %hash;##�ֿ���ԭʼ���� - -my $format=$opts{'format'}; -if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { - die "Parameter -format is error!\n"; -} - -my ($qualT,$qualV); -if (defined $opts{'qual'} && ($format eq "fastq" || $format eq "fq")) { #quality filter - my @temp=split /:/,$opts{'qual'}; - $qualT=$temp[0]; - $qualV=$temp[1]; - - for (my $i=0;$i<@filein;$i++) { - open IN,"<$filein[$i]"; - while (my $aline=<IN>) { - my $seq=<IN>; - my $n=<IN>; - my $qv=<IN>; - my $tag=&qvcheck($qv,$qualT,$qualV); - next if(!$tag); - my $str=substr($seq,0,6); - $hash{$str}[$i].=$seq; - } - close IN; - } -} -elsif($format eq "fastq" || $format eq "fq"){ ### do not filter low quality reads - for (my $i=0;$i<@filein;$i++) { - open IN,"<$filein[$i]"; - while (my $aline=<IN>) { - my $seq=<IN>; - my $n=<IN>; - my $qv=<IN>; - my $str=substr($seq,0,6); - $hash{$str}[$i].=$seq; - } - close IN; - } - -} -elsif($format eq "fasta" || $format eq "fa"){ - for (my $i=0;$i<@filein;$i++) { - open IN,"<$filein[$i]"; - while (my $aline=<IN>) { - my $seq=<IN>; - my $str=substr($seq,0,6); - $hash{$str}[$i].=$seq; - } - close IN; - } -} - -open OUT,">$fileout"; #output file -my $count=0; -foreach my $key (keys %hash) { - my %cluster; - for (my $i=0;$i<@filein;$i++) { - next if(!(defined $hash{$key}[$i])); - my @tmp=split/\n/,$hash{$key}[$i]; - foreach (@tmp) { - $cluster{$_}[$i]++; - } - } - - foreach my $seq (keys %cluster) { - my $exp=""; my $ee=0; - for (my $i=0;$i<@filein;$i++) { - if (defined $cluster{$seq}[$i]) { - $exp.="_$cluster{$seq}[$i]"; - $ee+=$cluster{$seq}[$i]; - }else{ - $exp.="_0"; - } - } - $count+=$ee; - $exp=~s/^_//; - print OUT ">$name","_$count:$exp","_x$ee\n$seq\n"; - } -} -close OUT; - - -sub qvcheck{ - my ($str,$t,$v)=@_; - my $qv=0; - if($t eq "mean"){ - $qv=&getMeanQuality($str); - } - elsif($t eq "min"){ - $qv=&getMinQuality($str); - } - if ($qv<$v) { - return 0; - } - return 1; -} - -sub getMeanQuality(){ - chomp $_[0]; - my @bases = split(//,$_[0]); - my $sum = 0; - for(my $i = 0; $i <= $#bases; $i++){ - my $num = ord($bases[$i]) - $pq; - $sum += $num; - } - - return $sum/($#bases+1); - -} - -### -### This function gives back the Q-value of the worst base -sub getMinQuality(){ - chomp $_[0]; - my @bases = split(//,$_[0]); - my $worst = 1000; - for(my $i = 0; $i <= $#bases; $i++){ -# printf ("base: $bases[$i] --> %d\n",ord($bases[$i])); - my $num = ord($bases[$i]) - $pq; - if($num < $worst){ - $worst = $num; - } - } - return $worst; -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -format -mark -qual -qv -o -options: --i input file#fastq file ##can be multiple -i file1 -i file2 ... --mark string#quary name,default is "seq" --o output file --format string # fastq|fasta|fq|fa - --qual #reads filter - eg:(min:value/mean:value) - This parameter just for solexa reads. - If the input files are solid and needs filter,please do filter first . - --qv integer #Phred quality64/33,default 33 --h help -USAGE -exit(1); -} - |
b |
diff -r 45de5e1ff487 -r 5691802f074b convert_bowtie_to_blast.pl --- a/convert_bowtie_to_blast.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,126 +0,0 @@ -#!/usr/bin/perl - - -use warnings; -use strict; -use Getopt::Std; - -######################################### USAGE ################################ - -my $usage= -"$0 file_bowtie_result file_solexa_seq file_chromosome - -This is a converter which changes Bowtie output into Blast format. -The input includes three files: a Bowtie result file (default Bowtie -output file), a fasta file consisting of small Reads and a chromosome -fasta file. It outputs the alignments in blast_parsed format. - -file_bowtie_result likes: - -AtFlower100010_x2 + MIR319c 508 AAGGAGATTCTTTCAGTCCAG IIIIIIIIIIIIIIIIIIIII 0 -AtFlower1000188_x1 + MIR2933a 421 TCGGAGAGGAAATTCGTCGGCG IIIIIIIIIIIIIIIIIIIIII 0 - -file_solexa_seq likes: - ->AtFlower100010_x2 -AAGGAGATTCTTTCAGTCCAG - -file_chromosome contains chromosome seq in fasta format - -"; - - -####################################### INPUT FILES ############################ - -my $file_bowtie_result=shift or die $usage; -my $file_short_seq=shift or die $usage; -my $file_chromosome_seq=shift or die $usage; - - -##################################### GLOBAL VARIBALES ######################### - -my %short_seq_length=(); -my %chromosome_length=(); - - -######################################### MAIN ################################# - -#get the short sequence id and its length -sequence_length($file_short_seq,\%short_seq_length); - -#get the chromosome sequence id and its length -sequence_length($file_chromosome_seq,\%chromosome_length); - -#convert bowtie result format to blast format; -change_format($file_bowtie_result); - -exit; - - -##################################### SUBROUTINES ############################## - -sub sequence_length{ - my ($file,$hash) = @_; - my ($id, $desc, $sequence, $seq_length) = (); - - open (FASTA, "<$file") or die "can not open $$file\n"; - while (<FASTA>) - { - chomp; - if (/^>(\S+)(.*)/) - { - $id = $1; - $desc = $2; - $sequence = ""; - while (<FASTA>){ - chomp; - if (/^>(\S+)(.*)/){ - $$hash{$id} = length $sequence; - $id = $1; - $desc = $2; - $sequence = ""; - next; - } - $sequence .= $_; - } - } - } - $seq_length=length($sequence); - $$hash{$id} = $seq_length; - close FASTA; -} - - - - - -sub change_format{ - #Change Bowtie format into blast format - my $file=shift @_; - open(FILE,"<$file")||die"can not open the bowtie result file:$!\n"; - #open(BLASTOUT,">blastout")||die"can not create the blastout file:$!\n"; - - while(<FILE>){ - chomp; - my @tmp=split("\t",$_); - #Clean the reads ID - my @tmp1=split(" ",$tmp[0]); - print "$tmp1[0]"."\t"."$short_seq_length{$tmp1[0]}"."\t"."1".'..'."$short_seq_length{$tmp1[0]}"."\t"."$tmp[2]"."\t"."$chromosome_length{$tmp[2]}"."\t"; - if($tmp[1] eq "+"){ - my $seq_end=$tmp[3] + $short_seq_length{$tmp1[0]}; - my $seq_bg=$tmp[3] + 1; - print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Plus"."\n"; - } - if($tmp[1] eq "-"){ - my $seq_end=$chromosome_length{$tmp[2]} - $tmp[3]; - my $seq_bg=$seq_end - $short_seq_length{$tmp1[0]} + 1; - print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Minus"."\n"; - } - } - -# close BLASTOUT; - -} - - - |
b |
diff -r 45de5e1ff487 -r 5691802f074b count_rfam_express.pl --- a/count_rfam_express.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1800 +0,0 @@\n-#!/usr/bin/perl -w\n-#Filename:\n-#Author: Tian Dongmei\n-#Email: tiandm@big.ac.cn\n-#Date: 2013/7/19\n-#Modified:\n-#Description: \n-my $version=1.00;\n-\n-use strict;\n-use Getopt::Long;\n-use File::Basename;\n-\n-my %opts;\n-GetOptions(\\%opts,"i=s","o=s","tag:s","h");\n-if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments\n-&usage;\n-}\n-\n-my $filein=$opts{\'i\'};\n-my $fileout=$opts{\'o\'};\n-\n-my $marks=defined $opts{\'tag\'} ? $opts{\'tag\'} : "";\n-\n-if(!(defined $opts{\'tag\'})){\n-\tmy $line=`head -1 $filein`;\n-\tmy @tmp=split/\\t/,$line;\n-\t$tmp[0]=~/:([\\d|_]+)_x(\\d+)$/;\n-\tmy @ss=split/_/,$1;\n-\tfor (my $i=1;$i<=@ss;$i++) {\n-\t\t$marks .="Smp$i;";\n-\t}\n-}\n-\n-my @marks=split/\\;/,$marks;\n-\n-my %rfam_key;\n-while(<DATA>){\n- chomp;\n- if(/^(\\S+)\\s+(\\S+)$/){\n-\t\t$rfam_key{$1}=$2;\n- }\n-}\n-\n-\n-my %reads;\n-my %tags;\n-open IN,"<$filein";\n-while (my $aline=<IN>) {\n-\tchomp $aline;\n-\tmy @tmp=split/\\t/,$aline;\n-\t$tmp[0]=~/:([\\d|_]+)_x(\\d+)$/;\n-\n-\tmy @exp=split/_/,$1;\n-\tmy @tag=split/\\;/,$tmp[2];\n-\n-\tif (defined $rfam_key{$tag[0]}) {\n-\t\tfor (my $i=0;$i<@exp;$i++) {\n-\t\t\t$reads{$rfam_key{$tag[0]}}[$i]+=$exp[$i];\n-\t\t\t$tags{$rfam_key{$tag[0]}}[$i]++ if($exp[$i]!=0);\n-\t\t}\n-\t}else{\n-\t\tfor (my $i=0;$i<@exp;$i++) {\n-\t\t\t$reads{other}[$i]+=$exp[$i];\n-\t\t\t$tags{other}[$i]++ if($exp[$i]!=0);\n-\t\t}\n-\t}\n-\n-}\n-close IN;\n-\n-$"="\\t"; ##### @array print in \\t\n-open OUT,">$fileout";\n-print OUT "####################################\\n# small RNA expressed reads number #\\n####################################\\n";\n-print OUT "#RNAname\\t@marks\\n";\n-foreach my $key (keys %reads) {\n-\tprint OUT $key;\n-\tfor (my $i=0;$i<@{$reads{$key}} ;$i++) {\n-\t\tprint OUT "\\t",$reads{$key}[$i];\n-\t}\n-\tprint OUT "\\n";\n-}\n-\n-print OUT "\\n\\n####################################\\n# small RNA expressed tags number #\\n####################################\\n";\n-print OUT "#RNAname\\t@marks\\n";\n-\n-foreach my $key (keys %tags) {\n-\tprint OUT $key;\n-\tfor (my $i=0;$i<@{$reads{$key}} ;$i++) {\n-\t\tif(defined $tags{$key}[$i]){print OUT "\\t",$tags{$key}[$i];}\n-\t\telse{print OUT "\\t0";}\n-\t}\n-\tprint OUT "\\n";\n-}\n-\n-close OUT;\n-$"=" "; ##### @array print in \\t\n-\n-sub usage{\n-print <<"USAGE";\n-Version $version\n-Usage:\n-$0 -i -tag -o\n-options:\n--i input file# rfam bowtie bwt. format mapping result\n--tag [string] sample marks# eg. sampleA;sampleB;sampleC\n--o output file\n-\n--h help\n-USAGE\n-exit(1);\n-}\n-\n-__DATA__\n-RF00635\tlncRNA\n-RF01868\tlncRNA\n-RF01869\tlncRNA\n-RF01870\tlncRNA\n-RF01871\tlncRNA\n-RF01872\tlncRNA\n-RF01873\tlncRNA\n-RF01874\tlncRNA\n-RF01875\tlncRNA\n-RF01876\tlncRNA\n-RF01877\tlncRNA\n-RF01878\tlncRNA\n-RF01879\tlncRNA\n-RF01880\tlncRNA\n-RF01881\tlncRNA\n-RF01882\tlncRNA\n-RF01883\tlncRNA\n-RF01884\tlncRNA\n-RF01885\tlncRNA\n-RF01886\tlncRNA\n-RF01887\tlncRNA\n-RF01888\tlncRNA\n-RF01889\tlncRNA\n-RF01890\tlncRNA\n-RF01891\tlncRNA\n-RF01892\tlncRNA\n-RF01893\tlncRNA\n-RF01894\tlncRNA\n-RF01904\tlncRNA\n-RF01905\tlncRNA\n-RF01906\tlncRNA\n-RF01907\tlncRNA\n-RF01908\tlncRNA\n-RF01909\tlncRNA\n-RF01928\tlncRNA\n-RF01929\tlncRNA\n-RF01930\tlncRNA\n-RF01931\tlncRNA\n-RF01932\tlncRNA\n-RF01933\tlncRNA\n-RF01934\tlncRNA\n-RF01935\tlncRNA\n-RF01946\tlncRNA\n-RF01947\tlncRNA\n-RF01948\tlncRNA\n-RF01950\tlncRNA\n-RF01951\tlncRNA\n-RF01952\tlncRNA\n-RF01953\tlncRNA\n-RF01954\tlncRNA\n-RF01955\tlncRNA\n-RF01956\tlncRNA\n-RF01957\tlncRNA\n-RF01958\tlncRNA\n-RF01961\tlncRNA\n-RF01962\tlncRNA\n-RF01963\tlncRNA\n-RF01964\tlncRNA\n-RF01965\tlncRNA\n-RF01966\tlncRNA\n-RF01967\tlncRNA\n-RF01968\tlncRNA\n-RF01969\tlncRNA\n-RF01970\tlncRNA\n-RF01971\tlncRNA\n-RF01972\tlncRNA\n-RF01973\tlncRNA\n-RF01974\tlncRNA\n-RF01975\tlncRNA\n-RF01976\tlncRNA\n-RF01977\tlncRNA\n-RF01978\tlncRNA\n-RF01979\tlncRNA\n-RF01980\tlncRNA\n-RF01981\tlncRNA\n-RF01983\tlncRNA\n-RF01984\tlncRNA\n-RF01985\tlncRNA\n-RF01986\tlncRNA\n-RF01987\tlncRNA\n-RF01992\tlncRNA\n-RF02038\tlncRNA\n-RF02039\tlncRNA\n-RF02040\tlncRNA\n-RF02041\tlncRNA\n-RF02042\tlncRNA\n-RF02043\tlncRNA\n-RF02044\tlncRNA\n-RF02045\tlncRNA\n-RF02046\tlncRNA\n-RF02047\tlncRNA\n-RF02085\tlncRNA\n-RF02086\tlncRNA\n-RF02087\tlncRNA\n-RF02089\tlncRNA\n-RF02090\tlncRNA\n-RF02091\tlncRNA\n-RF02098\tlncRNA\n-RF02101\tlncRNA\n-RF02102\tlncR'..b'5\tsnRNA\n-RF01646\tsnRNA\n-RF01647\tsnRNA\n-RF01648\tsnRNA\n-RF01649\tsnRNA\n-RF01650\tsnRNA\n-RF01651\tsnRNA\n-RF01652\tsnRNA\n-RF01653\tsnRNA\n-RF01654\tsnRNA\n-RF01655\tsnRNA\n-RF01658\tsnRNA\n-RF01659\tsnRNA\n-RF01660\tsnRNA\n-RF01661\tsnRNA\n-RF01662\tsnRNA\n-RF01664\tsnRNA\n-RF01802\tsnRNA\n-RF01829\tsnRNA\n-RF01844\tsnRNA\n-RF01846\tsnRNA\n-RF01847\tsnRNA\n-RF01848\tsnRNA\n-RF01860\tsnRNA\n-RF01861\tsnRNA\n-RF01862\tsnRNA\n-RF01863\tsnRNA\n-RF01864\tsnRNA\n-RF01866\tsnRNA\n-RF02163\tsnRNA\n-RF00014\tsRNA\n-RF00018\tsRNA\n-RF00021\tsRNA\n-RF00034\tsRNA\n-RF00035\tsRNA\n-RF00057\tsRNA\n-RF00077\tsRNA\n-RF00078\tsRNA\n-RF00079\tsRNA\n-RF00081\tsRNA\n-RF00082\tsRNA\n-RF00083\tsRNA\n-RF00084\tsRNA\n-RF00101\tsRNA\n-RF00110\tsRNA\n-RF00111\tsRNA\n-RF00112\tsRNA\n-RF00113\tsRNA\n-RF00115\tsRNA\n-RF00116\tsRNA\n-RF00117\tsRNA\n-RF00118\tsRNA\n-RF00119\tsRNA\n-RF00121\tsRNA\n-RF00122\tsRNA\n-RF00124\tsRNA\n-RF00125\tsRNA\n-RF00126\tsRNA\n-RF00128\tsRNA\n-RF00166\tsRNA\n-RF00195\tsRNA\n-RF00368\tsRNA\n-RF00369\tsRNA\n-RF00370\tsRNA\n-RF00371\tsRNA\n-RF00372\tsRNA\n-RF00378\tsRNA\n-RF00444\tsRNA\n-RF00505\tsRNA\n-RF00519\tsRNA\n-RF00615\tsRNA\n-RF00616\tsRNA\n-RF01116\tsRNA\n-RF01385\tsRNA\n-RF01386\tsRNA\n-RF01387\tsRNA\n-RF01388\tsRNA\n-RF01389\tsRNA\n-RF01390\tsRNA\n-RF01391\tsRNA\n-RF01392\tsRNA\n-RF01393\tsRNA\n-RF01394\tsRNA\n-RF01395\tsRNA\n-RF01396\tsRNA\n-RF01397\tsRNA\n-RF01398\tsRNA\n-RF01399\tsRNA\n-RF01400\tsRNA\n-RF01401\tsRNA\n-RF01402\tsRNA\n-RF01403\tsRNA\n-RF01404\tsRNA\n-RF01405\tsRNA\n-RF01406\tsRNA\n-RF01407\tsRNA\n-RF01408\tsRNA\n-RF01409\tsRNA\n-RF01410\tsRNA\n-RF01411\tsRNA\n-RF01412\tsRNA\n-RF01457\tsRNA\n-RF01459\tsRNA\n-RF01460\tsRNA\n-RF01461\tsRNA\n-RF01462\tsRNA\n-RF01463\tsRNA\n-RF01464\tsRNA\n-RF01465\tsRNA\n-RF01466\tsRNA\n-RF01467\tsRNA\n-RF01468\tsRNA\n-RF01469\tsRNA\n-RF01470\tsRNA\n-RF01471\tsRNA\n-RF01472\tsRNA\n-RF01473\tsRNA\n-RF01474\tsRNA\n-RF01476\tsRNA\n-RF01477\tsRNA\n-RF01478\tsRNA\n-RF01479\tsRNA\n-RF01487\tsRNA\n-RF01488\tsRNA\n-RF01489\tsRNA\n-RF01492\tsRNA\n-RF01493\tsRNA\n-RF01494\tsRNA\n-RF01496\tsRNA\n-RF01503\tsRNA\n-RF01504\tsRNA\n-RF01512\tsRNA\n-RF01519\tsRNA\n-RF01520\tsRNA\n-RF01521\tsRNA\n-RF01527\tsRNA\n-RF01528\tsRNA\n-RF01529\tsRNA\n-RF01530\tsRNA\n-RF01571\tsRNA\n-RF01578\tsRNA\n-RF01579\tsRNA\n-RF01580\tsRNA\n-RF01581\tsRNA\n-RF01582\tsRNA\n-RF01619\tsRNA\n-RF01623\tsRNA\n-RF01643\tsRNA\n-RF01656\tsRNA\n-RF01663\tsRNA\n-RF01665\tsRNA\n-RF01668\tsRNA\n-RF01669\tsRNA\n-RF01670\tsRNA\n-RF01671\tsRNA\n-RF01672\tsRNA\n-RF01673\tsRNA\n-RF01674\tsRNA\n-RF01675\tsRNA\n-RF01676\tsRNA\n-RF01677\tsRNA\n-RF01678\tsRNA\n-RF01679\tsRNA\n-RF01680\tsRNA\n-RF01681\tsRNA\n-RF01682\tsRNA\n-RF01683\tsRNA\n-RF01684\tsRNA\n-RF01685\tsRNA\n-RF01686\tsRNA\n-RF01687\tsRNA\n-RF01690\tsRNA\n-RF01691\tsRNA\n-RF01693\tsRNA\n-RF01694\tsRNA\n-RF01696\tsRNA\n-RF01698\tsRNA\n-RF01699\tsRNA\n-RF01700\tsRNA\n-RF01701\tsRNA\n-RF01702\tsRNA\n-RF01703\tsRNA\n-RF01705\tsRNA\n-RF01706\tsRNA\n-RF01710\tsRNA\n-RF01712\tsRNA\n-RF01714\tsRNA\n-RF01718\tsRNA\n-RF01719\tsRNA\n-RF01722\tsRNA\n-RF01723\tsRNA\n-RF01728\tsRNA\n-RF01732\tsRNA\n-RF01742\tsRNA\n-RF01757\tsRNA\n-RF01762\tsRNA\n-RF01775\tsRNA\n-RF01781\tsRNA\n-RF01782\tsRNA\n-RF01783\tsRNA\n-RF01784\tsRNA\n-RF01789\tsRNA\n-RF01791\tsRNA\n-RF01793\tsRNA\n-RF01796\tsRNA\n-RF01808\tsRNA\n-RF01810\tsRNA\n-RF01812\tsRNA\n-RF01814\tsRNA\n-RF01815\tsRNA\n-RF01816\tsRNA\n-RF01817\tsRNA\n-RF01818\tsRNA\n-RF01819\tsRNA\n-RF01820\tsRNA\n-RF01821\tsRNA\n-RF01822\tsRNA\n-RF01823\tsRNA\n-RF01827\tsRNA\n-RF01828\tsRNA\n-RF01858\tsRNA\n-RF01867\tsRNA\n-RF02029\tsRNA\n-RF02030\tsRNA\n-RF02031\tsRNA\n-RF02049\tsRNA\n-RF02050\tsRNA\n-RF02051\tsRNA\n-RF02052\tsRNA\n-RF02053\tsRNA\n-RF02054\tsRNA\n-RF02055\tsRNA\n-RF02056\tsRNA\n-RF02057\tsRNA\n-RF02059\tsRNA\n-RF02060\tsRNA\n-RF02062\tsRNA\n-RF02063\tsRNA\n-RF02064\tsRNA\n-RF02065\tsRNA\n-RF02066\tsRNA\n-RF02067\tsRNA\n-RF02070\tsRNA\n-RF02071\tsRNA\n-RF02072\tsRNA\n-RF02073\tsRNA\n-RF02074\tsRNA\n-RF02075\tsRNA\n-RF02077\tsRNA\n-RF02078\tsRNA\n-RF02079\tsRNA\n-RF02080\tsRNA\n-RF02081\tsRNA\n-RF02082\tsRNA\n-RF02099\tsRNA\n-RF02100\tsRNA\n-RF02151\tsRNA\n-RF02221\tsRNA\n-RF02222\tsRNA\n-RF02223\tsRNA\n-RF02224\tsRNA\n-RF02225\tsRNA\n-RF02226\tsRNA\n-RF02227\tsRNA\n-RF02228\tsRNA\n-RF02230\tsRNA\n-RF02231\tsRNA\n-RF02232\tsRNA\n-RF02233\tsRNA\n-RF02234\tsRNA\n-RF02235\tsRNA\n-RF02236\tsRNA\n-RF02237\tsRNA\n-RF02238\tsRNA\n-RF02239\tsRNA\n-RF02240\tsRNA\n-RF02241\tsRNA\n-RF02242\tsRNA\n-RF02243\tsRNA\n-RF02268\tsRNA\n-RF02269\tsRNA\n-RF00127\tsRNA\n-RF01852\ttRNA\n-RF00005\ttRNA\n' |
b |
diff -r 45de5e1ff487 -r 5691802f074b filterReadsByLength.pl --- a/filterReadsByLength.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,121 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2010-01 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","min=i","max=i","o=s","mark:s","h"); -if (!(defined $opts{i} and defined $opts{o} and defined $opts{min} and defined $opts{max}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample"; -my @mark=split /,/,$mark; - - -open OUT,">$opts{o}"; -open IN,"<$opts{i}"; -my %hash;my %reads; -while (my $aline=<IN>) { - chomp $aline; - my $seq=<IN>; - chomp $seq; - - if($aline=~/:([\d|_]+)_x(\d+)$/){ - my @ss=split/_/,$1; - for (my $i=0;$i<@ss;$i++) { - $hash{length($seq)}[$i]++ if($ss[$i]>0); - $hash{length($seq)}[$i] +=0 if($ss[$i]>0); - $reads{length($seq)}[$i]+=$ss[$i]; - } - } - #else{$reads{length($seq)}+=1;} - if (length ($seq)>=$opts{'min'} && length ($seq) <=$opts{'max'}) { - print OUT "$aline\n$seq\n"; - } -} -close IN; -close OUT; - -my $dir=dirname($opts{'o'}); -chdir $dir; -my $lengthfile=$dir."/reads_length_distribution.txt"; -open OUT, ">$lengthfile"; -open R,">$dir/length_distribution.R"; - -print OUT "Tags length\t@mark\n"; - -my $samNo=@mark; -my $avalue=""; -my @length=sort{$a<=>$b} keys %hash; -foreach (@length) { - print OUT $_,"\t@{$hash{$_}}\n"; - my $vv=join ", ",@{$hash{$_}}; - $avalue .="$vv,"; -} -$avalue =~s/,$//; -my $lengths=join ",",@length; -my $marks=join "\",\"",@mark; - -print R "a<-c($avalue) -b<-matrix(a,ncol=$samNo,byrow=T) -cl<-colors() -names=c($lengths) -legends=c(\"$marks\") -png(\"Tags_length.png\",width=800,height=600) -barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") -abline(h=0) -dev.off() - -"; -$avalue=""; -print OUT "\nReads length\t@mark\n"; -foreach (@length) { - print OUT $_,"\t@{$reads{$_}}\n"; - my $vv=join ", ", @{$reads{$_}}; - $avalue .= "$vv,"; -} -$avalue =~s/,$//; - -print R "a<-c($avalue)\n -b<-matrix(a,ncol=$samNo,byrow=T) - -png(\"Reads_length.png\",width=800,height=600) -barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") -abline(h=0) -dev.off() - -"; -close OUT; -close R; - -system ("R CMD BATCH $dir/length_distribution.R"); - -#system ("rm $dir/length_distribution.R"); -#system ("rm $dir/length_distribution.Rout"); -#system ("rm $dir/.RData"); -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -min -max -mark -options: - --i input file --o output file --min reads min length. --max reads max length. --mark string #sample name eg: samA,samB,samC --h help -USAGE -exit(1); -} - |
b |
diff -r 45de5e1ff487 -r 5691802f074b html.pl --- a/html.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,269 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2014-5-29 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","format=s","o=s","h"); -if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments -&usage; -} -my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath); -my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir); -open IN,"<$opts{i}"; -$config=<IN>; chomp $config; -$prepath=<IN>; chomp $prepath; -$rfampath=<IN>;chomp $rfampath; -$knownpath=<IN>; chomp $knownpath; -$genomepath=<IN>; chomp $genomepath; -$novelpath=<IN>; chomp $novelpath; -close IN; -my @tmp=split/\//,$prepath; -$predir=$tmp[-1]; -@tmp=split/\//,$rfampath; -$rfamdir=$tmp[-1]; -@tmp=split/\//,$knownpath; -$knowndir=$tmp[-1]; -@tmp=split/\//,$genomepath; -$genomedir=$tmp[-1]; -@tmp=split/\//,$novelpath; -$noveldir=$tmp[-1]; - -my $dir=dirname($opts{'o'}); - -open OUT ,">$opts{'o'}"; -print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD> - <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"����\">\n <b>Small RNA Analysis Report</b>\n </font>\n </h1> - <h2>1. Sequence No. and quality</h2> - <h3>1.1 Sequece No.</h3> -"; - -### raw data no -open IN,"<$config"; -my @files;my @marks; my @rawNo; -while (my $aline=<IN>) { - chomp $aline; - my @tmp=split/\t/,$aline; - push @files,$tmp[0]; - - my $no=`less $tmp[0] |wc -l `; - chomp $no; - if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { - $no=$no/4; - } - else{ - $no=$no/2; - } - push @rawNo,$no; - - push @marks,$tmp[1]; -} -close IN; - -### preprocess -unless ($prepath=~/\/$/) { - $prepath .="/"; -} - -my @trimNo;my @collapse; -my $collapsefile=$prepath."collapse_reads.fa"; -open IN,"<$collapsefile"; -while (my $aline=<IN>) { - chomp $aline; - <IN>; - $aline=~/:([\d|_]+)_x(\d+)$/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $trimNo[$i] +=$lng[$i]; - $collapse[$i] ++; - } - } -} -close IN; - -my @cleanR;my @cleanT; -my $clean=$prepath."collapse_reads_19_28.fa"; -open IN,"<$clean"; -while (my $aline=<IN>) { - chomp $aline; - <IN>; - $aline=~/:([\d|_]+)_x(\d+)$/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $cleanR[$i] +=$lng[$i]; - $cleanT[$i] ++; - } - } -} -close IN; - -print OUT "<table border=\"1\"> -<tr align=\"center\"> -<th> </th> -"; -foreach (@marks) { - print OUT "<th> $_ </th>\n"; -} -print OUT "</tr> -<tr align=\"center\"> -<th align=\"left\">Raw Reads No. </th> -"; -foreach (@rawNo) { - print OUT "<td> $_ </td>\n"; -} -print OUT "</tr> -<tr align=\"center\"> -<th align=\"left\">Reads No. After Trimed 3\' adapter </th> -"; -foreach (@trimNo) { - print OUT "<td> $_ </td>\n"; -} -print OUT "</tr> -<tr align=\"center\"> -<th align=\"left\">Unique Tags No. </th> -"; -foreach (@collapse) { - print OUT "<td> $_ </td>\n"; -} -print OUT "</tr> -<tr align=\"center\"> -<th align=\"left\">Clean Reads No. </th> -"; -foreach (@cleanR) { - print OUT "<td> $_ </td>\n"; -} -print OUT "</tr> -<tr align=\"center\"> -<th align=\"left\">Clean Tags No. </th> -"; -foreach (@cleanT) { - print OUT "<td> $_ </td>\n"; -} -print OUT "</tr>\n</table>"; -print OUT "<p> -Note:<br /> -The raw data file path is: <b>$files[0]</b><br /> -"; -for (my $i=1;$i<@files;$i++) { - print OUT "          <b>$files[$i]</b><br />"; -} -print OUT "The collapsed file path is: <b>$collapsefile</b><br /> -The clean data file path is: <b>$clean</b><br /> -</p> -<h2> 1. Sequence length count</h2> -<h3> 1.1 Reads length</h3> -"; - -print OUT "<img src=\"./$predir/Reads_length.png\" alt=\"Reads_length.png\" width=\"400\" height=\"300\"/> -<h3> 1.2 Tags length count</h3> -<img src=\"./$predir/Tags_length.png\" alt=\"Tags_length.png\" width=\"400\" height=\"300\"/> -<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a> -</p> -"; - -#### rfam -unless ($rfampath=~/\/$/) { - $rfampath .="/"; -} -print OUT "<h2>2. Rfam non-miRNA annotation</h2> -<h3>2.1 Reads count</h3> -<table border=\"1\"> -<tr align=\"center\"> -"; - -my @rfamR; my @rfamT; -my $tag=1; -open IN,"<$dir/rfam_non-miRNA_annotation.txt"; -while (my $aline=<IN>) { - chomp $aline; - $tag=0 if($aline=~/tags\s+number/); - next if($aline=~/^\#/); - next if($aline=~/^\s*$/); - my @tmp=split/\s+/,$aline; - if($tag == 1){push @rfamR,[@tmp];} - else{push @rfamT,[@tmp];} -} -close IN; - - -print OUT "<th>RNA Name</th>\n"; -foreach (@marks) { - print OUT "<th> $_ </th>\n"; -} -for (my $i=0;$i<@rfamR;$i++) { - print OUT "</tr> - <tr align=\"center\"> - <th align=\"left\">$rfamR[$i][0]</th> - "; - for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { - print OUT "<td> $rfamR[$i][$j]</td>\n"; - } -} - -print OUT "</tr>\n</table> - <h3>2.2 Tags count</h3> - <table border=\"1\"> - <tr align=\"center\"> - <th>RNA Name</th>\n"; -foreach (@marks) { - print OUT "<th> $_ </th>\n"; -} -for (my $i=0;$i<@rfamT;$i++) { - print OUT "</tr> - <tr align=\"center\"> - <th align=\"left\">$rfamT[$i][0]</th> - "; - for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { - print OUT "<td> $rfamT[$i][$j]</td>\n"; - } -} -print OUT "</tr>\n</table> -<p>Note:<br />The rfam mapping results is: <b>$rfampath</b>"; -print OUT "<b>rfam_mapped.bwt</b></p> - <h2>3. MicroRNA result</h2> - <h3>3.1 known microRNA</h3> - <p>The known microRNA express list: <a href=\"./known_microRNA_express.txt\"> known_microRNA_express.txt</a><br/> - The known microRNA alngment file: <a href=\"./known_microRNA_express.aln\"> known_microRNA_express.aln</a><br/> - The known moRs file: <a href=\"./known_microRNA_express.moRs\"> known_microRNA_express.moRs</a><br/> - The known microRNA mature sequence file: <a href=\"./known_microRNA_mature.fa\"> known_microRNA_mature.fa</a><br/> - The knowm microRNA precursor sequence file: <a href=\"./known_microRNA_precursor.fa\"> known_microRNA_precursor.fa</a> - </p> - - <h3>3.2 novel microRNA</h3> - <p>The novel microRNA prediction file:<a href=\"./microRNA_prediction.mrd\"> microRNA_prediction.mrd</a><br/> - The novel microRNA express list: <a href=\"./novel_microRNA_express.txt\"> novel_microRNA_express.txt</a><br/> - The novel microRNA mature sequence file: <a href=\"./novel_microRNA_mature.fa\"> novel_microRNA_mature.fa</a><br/> - The novel microRNA precursor sequence file: <a href=\"./novel_microRNA_precursor.fa\"> novel_microRNA_precursor.fa</a> - </p> -"; - - - -print OUT " - </BODY> -</HTML> -"; -close OUT; - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -o -options: --o output file --h help -USAGE -exit(1); -} - |
b |
diff -r 45de5e1ff487 -r 5691802f074b miRNA_Express_and_sequence.pl --- a/miRNA_Express_and_sequence.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,173 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2014-6-4 -#Modified: -#Description: solexa miRNA express and sequence -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h"); -if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $fileout=$opts{'list'}; -my $out=$opts{'fa'}; -my $preout=$opts{'pre'}; - -=cut -my %hash_pri; -open PRI,"<$opts{p}"; -while (my $aline=<PRI>) { - chomp $aline; - if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;} -} -close PRI; -=cut - -open IN,"<$filein"; #input file -open OUT,">$fileout"; #output file -open FA ,">$out"; -open PRE,">$preout"; - -print OUT "#ID\tcoordinate\tpos1\tpos2"; -my @marks=split/\,/,$opts{'tag'}; -foreach (@marks) { - print OUT "\t",$_,"_matureExp"; -} -foreach (@marks) { - print OUT "\t",$_,"_starExp"; -} -foreach (@marks) { - print OUT "\t",$_,"_totalExp"; -} - -print OUT "\n"; - -my (%uniq_id,$novel); -while (my $aline=<IN>) { - chomp $aline; - until ($aline =~ /^score\s+[-\d\.]+/){ - $aline = <IN>; - if (eof) {last;} - } - if (eof) {last;} -########## miRNA ID ################ - $novel++; -########### annotate#################### - do {$aline=<IN>;} until($aline=~/flank_first_end/) ; - chomp $aline; - my @flank1=split/\t/,$aline; - do {$aline=<IN>;} until($aline=~/flank_second_beg/) ; - chomp $aline; - my @flank2=split/\t/,$aline; -# -########## mature start loop pre #### - do {$aline=<IN>;} until($aline=~/mature_beg/) ; - chomp $aline; - my @start=split/\t/,$aline; -# $start[1] -=$flank1[1]; - do {$aline=<IN>;} until($aline=~/mature_end/) ; - chomp $aline; - my @end=split/\t/,$aline; -# $end[1] -=$flank1[1]; - do {$aline=<IN>;} until($aline=~/mature_seq/) ; - chomp $aline; - my @arr1=split/\t/,$aline; - do {$aline=<IN>;} until($aline=~/pre_seq/) ; - chomp $aline; - my @arr2=split/\t/,$aline; - do {$aline=<IN>;} until($aline=~/pri_id/) ; - chomp $aline; - my @pri_id=split/\t/,$aline; - do {$aline=<IN>;} until($aline=~/pri_seq/) ; - chomp $aline; - my @pri_seq=split/\t/,$aline; - do {$aline=<IN>;} until($aline=~/star_beg/) ; - chomp $aline; - my @star_start=split/\t/,$aline; -# $star_start[1] -=$flank1[1]; - do {$aline=<IN>;} until($aline=~/star_end/) ; - chomp $aline; - my @star_end=split/\t/,$aline; -# $star_end[1] -=$flank1[1]; - do {$aline=<IN>;} until($aline=~/star_seq/) ; - chomp $aline; - my @arr3=split/\t/,$aline; - print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t"; - #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t"; - print FA ">miR-c-$novel\n$arr1[1]\n"; - print PRE ">miR-c-$novel\n$pri_seq[1]\n"; -########## reads count ############# - <IN>; - my @count1;my @count2;my @count3;my @count4; - $aline=<IN>; - do { - chomp $aline; - my @reads=split/\t/,$aline; - my @pos=(); - $reads[5]=~/(\d+)\.\.(\d+)/; -# $pos[0] =$1-$flank1[1]; -# $pos[1] =$2-$flank1[1]; - $pos[0]=$1; - $pos[1]=$2; - $reads[0]=~/:([\d|_]+)_x(\d+)$/; - my @ss=split/_/,$1; - for (my $i=0;$i<@ss ;$i++) { - if (!(defined $count3[$i])) { - $count3[$i]=0; - } - if (!(defined $count4[$i])) { - $count4[$i]=0; - } - $count2[$i]+=$ss[$i]; - - } -# $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 ); -# $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 ); -# $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1); -# $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1); - if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 ) - { - for (my $i=0;$i<@ss;$i++) { - $count3[$i]+=$ss[$i]; - } - } - if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){ - for (my $i=0;$i<@ss;$i++) { - $count4[$i]+=$ss[$i]; - } - } - $aline=<IN>; - chomp $aline; - } until(length $aline < 1) ; - $"="\t"; - print OUT "@count3\t@count4\t@count2\n"; - $"=" "; -} - -close IN; -close OUT; - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -list -fa -pre -tag -options: --i input file,predictions file --list output file miRNA list file --fa output file ,miRNA sequence fasta file. --pre output file, miRNA precursor fasta file. --tag string, sample names# eg: samA,samB,samC --h help -USAGE -exit(1); -} - |
b |
diff -r 45de5e1ff487 -r 5691802f074b miRPlant.xml --- a/miRPlant.xml Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,90 +0,0 @@ -<tool id="plant_microRNA_v1" name="miRPlant" veision="1.0.0"> - <description>tool for plant microRNA analisis</description> - - <requirements> - <requirement type="set_environment">SCRIPT_PATH</requirement> - <requirement type="package" version="0.12.7">bowtie</requirement> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="0.0.13">fastx_toolkit </requirement> - <requirement type="package" version="1.5">ViennaRNA</requirement> - </requirements> - - <!--command interpreter="perl">miPlant.pl -i $input -format $format -gfa $gfa -idx $index -pre $pre -mat $mat -rfam $rfam -idx2 $idx2 -D $D -a $a -M $M -min $min -max $max -mis $mis -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe -t $t -o $output</command--> - - <command interpreter="perl">miRPlant.pl - ## Change this to accommodate the number of threads you have available. - -t \${GALAXY_SLOTS:-4} - ## Do or not delet rfam mapped tags - #if $params.delet_rfam == "yes": - -D - #end if - -path \$SCRIPT_PATH - - #for $j, $s in enumerate( $series ) - ##rank_of_series=$j - -i ${s.input} - -tag ${s.tag} - #end for - - -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe - </command> - - <inputs> - - <repeat name="series" title="Series"> - <param name="input" type="data" label="Raw data"/> - <param name="tag" type="text" data_ref="input" label="Sample name of raw data"/> - </repeat> - - <conditional name="params"> - <param name="delet_rfam" type="select" label="delet rfam mapped reads"> - <option value="yes" selected="true">yes</option> - <option value="no">no</option> - </param> - </conditional> <!-- params --> - - <!--param name="input" format="tabular" type="data" label="input config file" /--> - - <param name="format" type="select" lable="raw data format" multiple="false"> - <option value="fastq">Raw data is fastq. format</option> - <option value="fasta">Raw data is fasta. format</option> - </param> - - <param name="gfa" type="data" label="genome sequence fasta file"/> - <!--param type="data" name="index" label="genome sequence bowtie index"/--> - <param name="mat" type="data" label="mature microRNA sequence file" /> - <param name="pre" type="data" label="precursor microRNA sequence fie" /> - <param name="rfam" type="data" label="rfam sequence file" /> - <!--param type="data" name="idx2" label="rfam sequence bowtie index " --> - <param name="a" type="text" value="ATCTCGTATG" label="3' adapter sequence" /> - <param name="mapnt" type="integer" value="8" label="minimum adapter map nts" /> - <param name="min" type="integer" value="19" label="minimum microRNA length" /> - <param name="max" type="integer" value="28" label="maximum microRNA length" /> - <param name="mismatch" type="integer" value="0" label="number of allowed mismatches when mapping reads to precursors" /> - <param name="e" type="integer" value="2" label="number of nucleotides upstream of the mature sequence to consider" /> - <param name="f" type="integer" value="5" label="number of nucleotides downstream of the mature sequence to consider" /> - <param name="v" type="integer" value="0" label="report end-to-end hits less than v mismatches"/> - <param name="r" type="integer" value="25" label="a read is allowed to map up to this number of positions in the genome" /> - <param name="dis" type="integer" value="200" label="Maximal space between miRNA and miRNA*" /> - <param name="flank" type="integer" value="10" label="Flank sequence length of miRNA precursor" /> - <param name="mfe" type="float" value="-30" label="Maximal free energy allowed for a miRNA precursor" /> - <param name="thread" type="integer" value="1" label="number of threads" /> - </inputs> - - <outputs> - <data format="txt" name="known microRNA express list" from_work_dir="miRPlant_out/known_microRNA_express.txt"/> - <data format="txt" name="known microRNA express alignment" from_work_dir="miRPlant_out/known_microRNA_express.aln"/> - <data format="txt" name="known microRNA moRs result" from_work_dir="miRPlant_out/known_microRNA_express.moRs"/> - <data format="txt" name="known microRNA precursor file" from_work_dir="miRPlant_out/known_microRNA_precursor.fa"/> - <data format="txt" name="known microRNA mature file" from_work_dir="miRPlant_out/known_microRNA_mature.fa"/> - <data format="txt" name="novel microRNA prediction file" from_work_dir="miRPlant_out/known_microRNA_mature.fa"/> - <data format="txt" name="novel microRNA express list" from_work_dir="miRPlant_out/novel_microRNA_express.txt"/> - <data format="txt" name="novel microRNA precursor file" from_work_dir="miRPlant_out/novel_microRNA_precursor.fa"/> - <data format="txt" name="novel microRNA mature sequence file" from_work_dir="miRPlant_out/novel_microRNA_mature.fa"/> - <data format="txt" name="analysis result" from_work_dir="miRPlant_out/result.html"/> - </outputs> - - <help> - - </help> - </tool> |
b |
diff -r 45de5e1ff487 -r 5691802f074b precursors.pl --- a/precursors.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,789 +0,0 @@\n-#!/usr/bin/perl -w\n-#Filename:\n-#Author: Tian Dongmei\n-#Email: tiandm@big.ac.cn\n-#Date: 2013/7/19\n-#Modified:\n-#Description: \n-my $version=1.00;\n-\n-use strict;\n-use Getopt::Long;\n-use RNA;\n-\n-my %opts;\n-GetOptions(\\%opts,"map=s","g=s","d:i","f:i","o=s","e:f","s=s","h");\n-if (!(defined $opts{map} and defined $opts{g} and defined $opts{o} and defined $opts{s} ) || defined $opts{h}) { #necessary arguments\n-&usage;\n-}\n-\n-my $filein=$opts{\'map\'};\n-my $faout=$opts{\'o\'};\n-my $strout=$opts{\'s\'};\n-my $genome= $opts{\'g\'};\n-\n-my $maxd=defined $opts{\'d\'} ? $opts{\'d\'} : 200;\n-my $flank=defined $opts{\'f\'}? $opts{\'f\'} : 10;\n-\n-my $MAX_ENERGY=-18;\n-if (defined $opts{\'e\'}) {$MAX_ENERGY=$opts{\'e\'};}\n-my $MAX_UNPAIR=5;\n-my $MIN_PAIR=15;\n-my $MAX_SIZEDIFF=4;\n-my $MAX_BULGE=2;\n-my $ASYMMETRY=5;\n-my $MIN_UNPAIR=0;\n-my $MIN_SPACE=5;\n-my $MAX_SPACE=$maxd;\n-my $FLANK=$flank;\n-\n-######### load in genome sequences start ########\n-my %genome;\n-my %lng;\n-my $name;\n-open IN,"<$genome";\n-while (my $aline=<IN>) {\n-\tchomp $aline;\n-\tnext if($aline=~/^\\#/);\n-\tif ($aline=~/^>(\\S+)/) {\n-\t\t$name=$1;\n-\t\tnext;\n-\t}\n-\t$genome{$name} .=$aline;\n-}\n-close IN;\n-foreach my $key (keys %genome) {\n-\t$lng{$key}=length($genome{$key});\n-}\n-####### load in genome sequences end ##########\n-\n-my %breaks; ### reads number bigger than 3\n-open IN,"<$filein"; #input file \n-while (my $aline=<IN>) {\n-\tchomp $aline;\n-\tmy @tmp=split/\\t/,$aline;\n-\t$tmp[0]=~/_x(\\d+)$/;\n-\tmy $no=$1;\n-\tnext if($no<3);\n-\t#my $trand=&find_strand($tmp[9]);\n-\t#my @pos=split/\\.\\./,$tmp[5];\n-\tmy $end=$tmp[3]+length($tmp[4])-1;\n-\tif($tmp[1] eq "-"){$tmp[4]=revcom($tmp[4]);}\n-\tpush @{$breaks{$tmp[2]}{$tmp[1]}},[$tmp[3],$end,$no,$tmp[4]]; ### 0 base\n-}\n-close IN;\n-\n-my %cites; ### peaks\n-foreach my $chr (keys %breaks) {\n-\tforeach my $strand (keys %{$breaks{$chr}}) {\n-\t\tmy @array=@{$breaks{$chr}{$strand}};\n-\t\t@array=sort{$a->[0]<=>$b->[0]} @array;\n-\t\tfor (my $i=0;$i<@array;$i++) {\n-\t\t\tmy $start=$array[$i][0];my $end=$array[$i][1];\n-\t\t\tmy @subarray=();\n-\t\t\tpush @subarray,$array[$i];\n-\n-\t\t\tfor (my $j=$i+1;$j<@array;$j++) {\n-\t\t\t\tif ($start<$array[$j][1] && $end>$array[$j][0]) {\n-\t\t\t\t\tpush @subarray,$array[$j];\n-\t\t\t\t\t($start,$end)=&newpos($start,$end,$array[$j][0],$array[$j][1]);\n-\t\t\t\t}\n-\t\t\t\telse{\n-\t\t\t\t\t$i=$j;\n-\t\t\t\t\t&find_cites(\\@subarray,$chr,$strand);\n-\t\t\t\t\tlast;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\t}\n-}\n-\n-open FA,">$faout"; #output file \n-open STR,">$strout";\n-foreach my $chr (keys %cites) {\n-\tforeach my $strand (keys %{$cites{$chr}}) {\n-\n-\t\tmy @array2=@{$cites{$chr}{$strand}};\n-\t\t@array2=sort{$a->[0]<=>$b->[0]} @array2;\n-\t\t&excise(\\@array2,$chr,$strand);\n-\t}\n-}\n-close FA;\n-close STR;\n-sub oneCiteDn{\n-\tmy ($array,$a,$chr,$strand)=@_;\n-\n-\tmy $ss=$$array[$a][0]-$flank;\n-\t$ss=0 if($ss<0);\n-\tmy $ee=$$array[$a][1]+$maxd+$flank;\n-\t$ee=$lng{$chr} if($ee>$lng{$chr});\n-\t\n-\tmy $seq=substr($genome{$chr},$ss,$ee-$ss+1);\n-\tif($strand eq "-"){$seq=revcom($seq);}\n-\n-\tmy $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);\n-\treturn $val;\n-}\n-sub oneCiteUp{\n-\tmy ($array,$a,$chr,$strand)=@_;\n-\n-\tmy $ss=$$array[$a][0]-$maxd-$flank;\n-\t$ss=0 if($ss<0);\n-\tmy $ee=$$array[$a][1]+$flank;\n-\t$ee=$lng{$chr} if($ee>$lng{$chr});\n-\t\n-\tmy $seq=substr($genome{$chr},$ss,$ee-$ss+1);\n-\tif($strand eq "-"){$seq=revcom($seq);}\n-\n-\tmy $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);\n-\treturn $val;\n-\n-}\n-\n-sub twoCites{\n-\tmy ($array,$a,$b,$chr,$strand)=@_;\n-\n-\tmy $ss=$$array[$a][0]-$flank;\n-\t$ss=0 if($ss<0);\n-\tmy $ee=$$array[$b][1]+$flank;\n-\t$ee=$lng{$chr} if($ee>$lng{$chr});\n-\t\n-\tmy $seq=substr($genome{$chr},$ss,$ee-$ss+1);\n-\tif($strand eq "-"){$seq=revcom($seq);}\n-\n-#\tmy( $str,$mfe)=RNA::fold($seq);\n-#\treturn 0 if($mfe>$MAX_ENERGY); ### minimum mfe\n-\tmy $val=&ffw2($seq,$$array[$a][3],$$array[$b][3],$chr,$strand,$ss,$ee);\n-\t\n-\treturn $val;\n-\n-}\n-sub excise{\n-\tmy ($cluster,$chr,$strand)=@_;\n-\n-\tmy $last_pos=0;\n-\tfor (my $i=0;$i<@{$cluster};$i++) {\n-\t\tnext if($$cluster[$i][0]<$last_pos);\n-\t\tmy $ok=0;\n-\t\tfor (my $j=$i+1;$j<@{$cluster} ;$j++) {\n-\t\t\tif($'..b' s2\n-\n-\tmy $pair_num=0;\n-\tmy $overhang1=0;\n-\tmy $overhang2=0;\n-\tmy ($s1,$e1,$s2,$e2);\n-\tforeach my $i ($beg1..$end1) {\n-\t\tif (defined $table->{$i}) {\n-\t\t\tmy $j=$table->{$i};\n-\t\t\tif ($j <= $end2 && $j >= $beg2) {\n-\t\t\t\t$s1=$i;\n-\t\t\t\t$e2=$j;\n-\t\t\t\tlast;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tforeach my $i (reverse ($beg1..$end1)) {\n-\t\tif (defined $table->{$i}) {\n-\t\t\tmy $j=$table->{$i};\n-\t\t\tif ($j <= $end2 && $j >= $beg2) {\n-\t\t\t\t$e1=$i;\n-\t\t\t\t$s2=$j;\n-\t\t\t\tlast;\n-\t\t\t}\n-\t\t}\n-\t}\n-\n-#\tprint "$beg1,$end1 $s1,$e1\\n";\n-#\tprint "$beg2,$end2 $s2,$e2\\n";\n-\n-\tforeach my $i ($beg1..$end1) {\n-\t\tif (defined $table->{$i}) {\n-\t\t\tmy $j=$table->{$i};\n-\t\t\tif ($j <= $end2 && $j >= $beg2) {\n-\t\t\t\t++$pair_num;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tif (defined $s1 && defined $e2) {\n-\t\t$overhang1=($end2-$e2)-($s1-$beg1);\n-\t}\n-\tif (defined $e1 && defined $s2) {\n-\t\t$overhang2=($end1-$e1)-($s2-$beg2);\n-\t}\n-\t\n-\tif ($pair_num < 13) {\n-\t\t$like_mir_duplex=0;\n-\t}\n-\tif ($overhang1 < 0 && $overhang2 < 0) {\n-\t\t$like_mir_duplex=0;\n-\t}\n-\treturn ($like_mir_duplex,$pair_num,$overhang1,$overhang2);\n-}\n-sub parse_struct {\n-\tmy $struct=shift;\n-\tmy $table=shift;\n-\n-\tmy @t=split(\'\',$struct);\n-\tmy @lbs; # left brackets\n-\tforeach my $k (0..$#t) {\n-\t\tif ($t[$k] eq "(") {\n-\t\t\tpush @lbs, $k+1;\n-\t\t}\n-\t\telsif ($t[$k] eq ")") {\n-\t\t\tmy $lb=pop @lbs;\n-\t\t\tmy $rb=$k+1;\n-\t\t\t$table->{$lb}=$rb;\n-\t\t\t$table->{$rb}=$lb;\n-\t\t}\n-\t}\n-\tif (@lbs) {\n-\t\twarn "unbalanced RNA struct.\\n";\n-\t}\n-}\n-sub which_arm {\n-\tmy $substruct=shift;\n-\tmy $arm;\n-\tif ($substruct=~/\\(/ && $substruct=~/\\)/) {\n-\t\t$arm="-";\n-\t}\n-\telsif ($substruct=~/\\(/) {\n-\t\t$arm="5p";\n-\t}\n-\telse {\n-\t\t$arm="3p";\n-\t}\n-\treturn $arm;\n-}\n-sub biggest_bulge {\n-\tmy $struct=shift;\n-\tmy $bulge_size=0;\n-\tmy $max_bulge=0;\n-\twhile ($struct=~/(\\.+)/g) {\n-\t\t$bulge_size=length $1;\n-\t\tif ($bulge_size > $max_bulge) {\n-\t\t\t$max_bulge=$bulge_size;\n-\t\t}\n-\t}\n-\treturn $max_bulge;\n-}\n-sub get_asy {\n-\tmy($table,$a1,$a2)=@_;\n-\tmy ($pre_i,$pre_j);\n-\tmy $asymmetry=0;\n-\tforeach my $i ($a1..$a2) {\n-\t\tif (defined $table->{$i}) {\n-\t\t\tmy $j=$table->{$i};\n-\t\t\tif (defined $pre_i && defined $pre_j) {\n-\t\t\t\tmy $diff=($i-$pre_i)+($j-$pre_j);\n-\t\t\t\t$asymmetry += abs($diff);\n-\t\t\t}\n-\t\t\t$pre_i=$i;\n-\t\t\t$pre_j=$j;\n-\t\t}\n-\t}\n-\treturn $asymmetry;\n-}\n-\n-sub peaks{\n-\tmy @cluster=@{$_[0]};\n-\n-\treturn if(@cluster<1);\n-\n-\tmy $max=0; my $index=-1;\n-\tfor (my $i=0;$i<@cluster;$i++) {\n-\t\tif($cluster[$i][2]>$max){\n-\t\t\t$max=$cluster[$i][2];\n-\t\t\t$index=$i;\n-\t\t}\n-\t}\n-#\t&excise(\\@cluster,$index,$_[1],$_[2]);\n-\treturn($index);\n-}\n-\n-sub find_cites{\n-\tmy @tmp=@{$_[0]};\n-\tmy $i=&peaks(\\@tmp);\n-\t\n-\tmy $start=$tmp[$i][0];\n-\tmy $total=0; my $node5=0;\n-\tfor (my $j=0;$j<@tmp ;$j++) {\n-\t\t$total+=$tmp[$j][2];\n-\t\t$node5 +=$tmp[$j][2] if($tmp[$j][0]-$start<=2 && $tmp[$j][0]-$start>=-2);\n-\t}\n-\tpush @{$cites{$_[1]}{$_[2]}},$tmp[$i] if($node5/$total>0.80 && $tmp[$i][2]/$node5>0.5);\n-}\n-\n-sub newpos{\n-\tmy ($a,$b,$c,$d)=@_;\n-\tmy $s= $a>$c ? $c : $a;\n-\tmy $e=$b>$d ? $b : $d;\n-\treturn($s,$e);\n-}\n-\n-sub rev{\n-\n- my($sequence)=@_;\n-\n- my $rev=reverse $sequence; \n-\n- return $rev;\n-}\n-\n-sub com{\n-\n- my($sequence)=@_;\n-\n- $sequence=~tr/acgtuACGTU/TGCAATGCAA/; \n- \n- return $sequence;\n-}\n-\n-sub revcom{\n-\n- my($sequence)=@_;\n-\n- my $revcom=rev(com($sequence));\n-\n- return $revcom;\n-}\n-\n-sub find_strand{\n-\n- #A subroutine to find the strand, parsing different blast formats\n- my($other)=@_;\n-\n- my $strand="+";\n-\n- if($other=~/-/){\n-\t$strand="-";\n- }\n-\n- if($other=~/minus/i){\n-\t$strand="-";\n- }\n-\n- return($strand);\n-}\n-sub usage{\n-print <<"USAGE";\n-Version $version\n-Usage:\n-$0 -map -g -d -f -o -s -e\n-options:\n- -map input file# align result # bst. format\n- -g input file # genome sequence fasta format\n- -d <int> Maximal space between miRNA and miRNA* (200)\n- -f <int> Flank sequence length of miRNA precursor (10)\n- -o output file# percursor fasta file\n- -s output file# precursor structure file\n- -e <folat> Maximal free energy allowed for a miRNA precursor (-18 kcal/mol)\n-\n- -h help\n-USAGE\n-exit(1);\n-}\n-\n' |
b |
diff -r 45de5e1ff487 -r 5691802f074b quantify.pl --- a/quantify.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,495 +0,0 @@\n-#!/usr/bin/perl -w\r\n-#Filename:\r\n-#Author: Tian Dongmei\r\n-#Email: tiandm@big.ac.cn\r\n-#Date: 2013/7/19\r\n-#Modified:\r\n-#Description: \r\n-my $version=1.00;\r\n-\r\n-use File::Path;\r\n-use strict;\r\n-use File::Basename;\r\n-#use Getopt::Std;\r\n-use Getopt::Long;\r\n-use RNA;\r\n-\r\n-my %opts;\r\n-GetOptions(\\%opts,"r=s","p=s","m=s","mis:i","t:i","e:i","f:i","tag:s","o=s","time:s","h");\r\n-if (!(defined $opts{r} and defined $opts{p} and defined $opts{m} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments\r\n-&usage;\r\n-}\r\n-\r\n-my $read=$opts{\'r\'};\r\n-my $pre=$opts{\'p\'};\r\n-my $mature=$opts{\'m\'};\r\n-\r\n-my $dir=$opts{\'o\'};\r\n-unless ($dir=~/\\/$/) {$dir .="/";}\r\n-if (not -d $dir) {\r\n-\tmkdir $dir;\r\n-}\r\n-\r\n-my $threads=defined $opts{\'t\'} ? $opts{\'t\'} : 1;\r\n-my $mismatch=defined $opts{\'mis\'} ? $opts{\'mis\'} : 0;\r\n-\r\n-my $upstream = 2;\r\n-my $downstream = 5;\r\n-\r\n-$upstream = $opts{\'e\'} if(defined $opts{\'e\'});\r\n-$downstream = $opts{\'f\'} if(defined $opts{\'f\'});\r\n-\r\n-my $marks=defined $opts{\'tag\'} ? $opts{\'tag\'} : "";\r\n-\r\n-my $time=Time();\r\n-if (defined $opts{\'time\'}) { $time=$opts{\'time\'};}\r\n-\r\n-my $tmpdir="${dir}/miRNA_Express_${time}";\r\n-if(not -d $tmpdir){\r\n-\tmkdir($tmpdir);\r\n-}\r\n-chdir $tmpdir;\r\n-\r\n-`cp $pre ./`;\r\n-my $pre_file_name=basename($pre);\r\n-\r\n-&mapping(); # matures align to precursors && reads align to precursors;\r\n-\r\n-my %pre_mature; # $pre_mature{pre_id}{matre_ID}{"mature"}[0]->start; $pre_mature{pre_id}{matre_ID}{"mature"}[1]->end;\r\n-&maturePosOnPre(); # acknowledge mature positions on precursor \r\n-\r\n-my %pre_read;\r\n-&readPosOnPre(); # acknowledge reads positions on precursors\r\n-\r\n-if(!(defined $opts{\'tag\'})){\r\n-\tforeach my $key (keys %pre_read) {\r\n-\t\t$pre_read{$key}[0][0]=~/:([\\d|_]+)_x(\\d+)$/;\r\n-\t\tmy @ss=split/_/,$1;\r\n-\t\tfor (my $i=1;$i<=@ss;$i++) {\r\n-\t\t\t$marks .="Smp$i;";\r\n-\t\t}\r\n-\t\tlast;\r\n-\t}\r\n-}\r\n-\r\n-my %pre;## read in precursor sequences #$pre{pre_id}="CGTA...."\r\n-&attachPre();\r\n-\r\n-my $preno=scalar (keys %pre);\r\n-print "Total Precursor Number is $preno !!!!\\n";\r\n-\r\n-my %struc; #mature star loop; $struc{$key}{"struc"}=$str; $struc{$key}{"mfe"}=$mfe;\r\n-&structure();\r\n-\r\n-\r\n-##### analysis and print out && moRs\r\n-my $aln=$dir."known_microRNA_express.aln";\r\n-my $list=$dir."known_microRNA_express.txt";\r\n-my $moRs=$dir."known_microRNA_express.moRs";\r\n-\r\n-system("ln $mature $dir/known_microRNA_mature.fa ");\r\n-system("ln $pre $dir/known_microRNA_precursor.fa ");\r\n-\r\n-open ALN,">$aln";\r\n-open LIST,">$list";\r\n-open MORS,">$moRs";\r\n-\r\n-$"="\\t"; ##### @array print in \\t\r\n-\r\n-my @marks=split/\\;/,$marks;\r\n-#print LIST "#matueID\\tpreID\\tpos1\\tpos2\\tmatureExp\\tstarExp\\ttotalExp\\n";\r\n-print LIST "#matueID\\tpreID\\tpos1\\tpos2";\r\n-for (my $i=0;$i<@marks;$i++) {\r\n-\tprint LIST "\\t",$marks[$i],"_matureExp";\r\n-}\r\n-for (my $i=0;$i<@marks;$i++) {\r\n-\tprint LIST "\\t",$marks[$i],"_starExp";\r\n-}\r\n-for (my $i=0;$i<@marks;$i++) {\r\n-\tprint LIST "\\t",$marks[$i],"_totalExp";\r\n-}\r\n-print LIST "\\n";\r\n-print ALN "#>precursor ID \\n#precursor sequence\\n#precursor structure (mfe)\\n#RNA_seq\\t@marks\\ttotal\\n";\r\n-print MORS "#>precursor ID\\tstrand\\texpress_reads\\texpress_reads\\/total_reads\\tblock_number\\tprecursor_sequence\\n#\\tblock_start\\tblock_end\\t@marks\\ttotal\\ttag_number\\tsequence\\n";\r\n-my %moRs;\r\n-\r\n-foreach my $key (keys %pre) {\r\n-\tprint ALN ">$key\\n$pre{$key}\\n$struc{$key}{struc} ($struc{$key}{mfe})\\n";\r\n-\tnext if(! (exists $pre_read{$key}));\r\n-\tmy @array=@{$pre_read{$key}};\r\n-\t@array=sort{$a->[3]<=> $b->[3]} @array;\r\n-\t\r\n-\tmy $length=length($pre{$key});\r\n-\r\n-\tmy $maxline=-1;my $max=0; ### storage the maxinum express read line\r\n-\tmy $totalReadsNo=0; \r\n-\tmy @not_over=(); ### new read format better for moRs analysis\r\n-\r\n-####print out Aln file start\r\n-\tfor (my $i=0;$i<@array;$i++) {\r\n-\t\tmy $maps=$array[$i][3]+1;\r\n-\t\tmy $mape=$array[$i][3]+length($array[$i][4]);\r\n-\t\tmy $str="";\r\n-\t\t$str .= "." x ($maps-1);\r\n-\t\t$str .=$array[$i][4];\r\n-\t\t$str .="." x ($length-$mape);\r\n-\t\t$str .=" ";\r\n-\r\n-\t\t$array[$i][0]=~/:([\\d|_]+)_x(\\d+)$/;\r\n-\t\tmy @sampl'..b') {\r\n-\t\t\tmy $a=$i; my $b=$a2b{$i};\r\n-#\t\t\tif($a>$b){\r\n-#\t\t\t\t$startpos=$b-$n+2;\r\n-#\t\t\t\t$endpos=$b-$n+2+($end-$start);\r\n-#\t\t\t}\r\n-#\t\t\tif($a<$b){\r\n-\t\t\t\t$endpos=$b+$n+2;\r\n-\t\t\t\tif($endpos>length($structure)){$endpos=length($structure);}\r\n-\t\t\t\t$startpos=$b+$n+2-($end-$start);\r\n-\t\t\t\tif($startpos<1){$startpos=1;}\r\n-#\t\t\t}\r\n-\t\t\tlast;\r\n-\t\t}\r\n-\t\t$n++;\r\n-\t}\r\n-\treturn ($startpos,$endpos);\r\n-}\r\n-sub attachPre{\r\n-\topen IN, "<$pre_file_name";\r\n-\tmy $name;\r\n-\twhile (my $aline=<IN>) {\r\n-\t\tchomp $aline;\r\n-\t\tif ($aline=~/^>(\\S+)/) {\r\n-\t\t\t$name=$1;\r\n-\t\t\tnext;\r\n-\t\t}\r\n-\t\t$pre{$name} .=$aline;\r\n-\t}\r\n-\tclose IN;\r\n-}\r\n-sub readPosOnPre{\r\n-\topen IN,"<read_mapped.bwt";\r\n-\twhile (my $aline=<IN>) {\r\n-\t\tchomp $aline;\r\n-\t\tmy @tmp=split/\\t/,$aline;\r\n-\t\tmy $id=lc($tmp[2]);\r\n-\t\tpush @{$pre_read{$tmp[2]}},[@tmp];\r\n-\t}\r\n-\tclose IN;\r\n-}\r\n-sub maturePosOnPre{\r\n-\topen IN,"<mature_mapped.bwt";\r\n-\twhile (my $aline=<IN>) {\r\n-\t\tchomp $aline;\r\n-\t\tmy @tmp=split/\\t/,$aline;\r\n-\t\tmy $mm=$tmp[0];\r\n-#\t\t$mm=~s/\\-3P|\\-5P//i;\r\n-\t\t$mm=lc($mm);\r\n-\t\tmy $pm=$tmp[2];\r\n-\t\t$pm=lc($pm);\r\n-\r\n-#\t\tnext if ($mm ne $pm);### stringent mapping let7a only allowed to map pre-let7a\r\n-\t\tnext if($mm!~/$pm/);\r\n-#\t\tprint "$tmp[2]\\t$tmp[0]\\n";\r\n-#\t\t$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]-$upstream;\r\n-#\t\t$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=0 if($pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]<0);\r\n-#\t\t$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4])-1+$downstream;\r\n-\t\t$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]+1;\r\n-\t\t$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4]);\r\n-\t}\r\n-\tclose IN;\r\n-}\r\n-sub mapping{\r\n- my $err;\r\n-## build bowtie index\r\n- print STDERR "building bowtie index\\n";\r\n- $err = `bowtie-build $pre_file_name miRNA_precursor`;\r\n-\r\n-## map mature sequences against precursors\r\n- print STDERR "mapping mature sequences against index\\n";\r\n-\t$err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature mature_mapped.bwt`;\r\n-\r\n-## map reads against precursors\r\n- print STDERR "mapping read sequences against index\\n";\r\n- $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa read_mapped.bwt `;\r\n-\r\n-}\r\n-\r\n-sub subseq{\r\n-\tmy $seq=shift;\r\n-\tmy $beg=shift;\r\n-\tmy $end=shift;\r\n-\tmy $strand=shift;\r\n-\t\r\n-\tmy $subseq=substr($seq,$beg-1,$end-$beg+1);\r\n-\tif ($strand eq "-") {\r\n-\t\t$subseq=revcom($subseq);\r\n-\t}\r\n-\treturn uc $subseq;\r\n-}\r\n-\r\n-sub revcom{\r\n-\tmy $seq=shift;\r\n-\t$seq=~tr/ATCGatcg/TAGCtagc/;\r\n-\t$seq=reverse $seq;\r\n-\treturn uc $seq;\r\n-}\r\n-\r\n-sub Time{\r\n- my $time=time();\r\n- my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];\r\n- $month++;\r\n- $year+=1900;\r\n- if (length($sec) == 1) {$sec = "0"."$sec";}\r\n- if (length($min) == 1) {$min = "0"."$min";}\r\n- if (length($hour) == 1) {$hour = "0"."$hour";}\r\n- if (length($day) == 1) {$day = "0"."$day";}\r\n- if (length($month) == 1) {$month = "0"."$month";}\r\n- #print "$year-$month-$day $hour:$min:$sec\\n";\r\n- return("$year-$month-$day-$hour-$min-$sec");\r\n-}\r\n-\r\n-sub usage{\r\n-print <<"USAGE";\r\n-Version $version\r\n-Usage:\r\n-$0 -r -p -m -mis -t -e -f -tag -o -time\r\n-mandatory parameters:\r\n--p precursor.fa miRNA precursor sequences from miRBase # must be absolute path\r\n--m mature.fa miRNA sequences from miRBase # must be absolute path\r\n--r reads.fa your read sequences #must be absolute path\r\n-\r\n--o output directory\r\n-\r\n-options:\r\n--mis [int] number of allowed mismatches when mapping reads to precursors, default 0\r\n--t [int] threads number,default 1\r\n--e [int] number of nucleotides upstream of the mature sequence to consider, default 2\r\n--f [int] number of nucleotides downstream of the mature sequence to consider, default 5\r\n--tag [string] sample marks# eg. sampleA;sampleB;sampleC\r\n--time sting #make directory time,default is the local time\r\n--h help\r\n-USAGE\r\n-exit(1);\r\n-}\r\n-\r\n' |
b |
diff -r 45de5e1ff487 -r 5691802f074b rfam.pl --- a/rfam.pl Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,103 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","time:s","h"); -if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $dir=$opts{'o'}; -unless ($dir=~/\/$/) {$dir.="/";} -my $rfam=$opts{'ref'}; -my $mis=defined $opts{'v'}? $opts{'v'} : 0; -my $index=defined $opts{'index'} ? $opts{'index'} : ""; -my $threads=defined $opts{'p'} ? $opts{'p'} : 1; - -if (not -d $dir) { - mkdir $dir; -} - - -my $time=Time(); -if (defined $opts{'time'}) { - $time=$opts{'time'}; -} -my $mapdir=$dir."/rfam_match_".$time; -if(not -d $mapdir){ - mkdir $mapdir; -} -chdir $mapdir; -###check genome index -if (-s $index.".1.ebwt") { -}else{ - &checkACGT($rfam); - `bowtie-build $rfam`; - $index="$rfam"; -} -### genome mapping -`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt`; - -sub checkACGT{ - my $string; - open IN,"<$rfam"; - while (my $aline=<IN>) { - if ($aline!~/^>/) { - $aline=~s/U/T/gi; - } - $string .=$aline; - } - close IN; - $rfam=basename($rfam); - open OUT, ">$rfam"; - print OUT $string; - close OUT; -} - -sub Time{ - my $time=time(); - my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; - $month++; - $year+=1900; - if (length($sec) == 1) {$sec = "0"."$sec";} - if (length($min) == 1) {$min = "0"."$min";} - if (length($hour) == 1) {$hour = "0"."$hour";} - if (length($day) == 1) {$day = "0"."$day";} - if (length($month) == 1) {$month = "0"."$month";} - #print "$year-$month-$day $hour:$min:$sec\n"; - return("$year-$month-$day-$hour-$min-$sec"); -} -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -options: --i input file# input reads fasta/fastq file --ref input file# rfam file, which do not contain miRNAs --index file-prefix #(must be indexed by bowtie-build) The parameter - string must be the prefix of the bowtie index. For instance, if - the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then - the prefix is 'h_sapiens_37_asm'.##can be null --v <int> report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; - --p/--threads <int> number of alignment threads to launch (default: 1) - --o output directory --time sting #make directory time,default is the local time --h help -USAGE -exit(1); -} - |
b |
diff -r 45de5e1ff487 -r 5691802f074b tool_dependencies.xml --- a/tool_dependencies.xml Fri Jul 25 05:57:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,28 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="fastx_toolkit" version="0.0.13"> - <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="bowtie" version="0.12.7"> - <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <set_environment version="1.0"> - <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> - </set_environment> - <package name="R" version="2.11.0"> - <repository changeset_revision="8d0a55bf7aaf" name="package_r_2_11_0" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="ViennaRNA" version="1.5"> - <install version="1.0"> - <actions> - <action type="download_by_url">http://web.mit.edu/seven/src/ViennaRNA-1.5beta.tar.gz</action> - <action type="shell_command">./configure --prefix=$INSTALL_DIR --datadir=$INSTALL_DIR</action> - <action type="shell_command">make</action> - <action type="shell_command">make install</action> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - </package> -</tool_dependency> |