diff kmersvm/kmer2meme.pl @ 7:fd740d515502 draft default tip

Uploaded revised kmer-SVM to include modules from kmer-visual.
author cafletezbrant
date Sun, 16 Jun 2013 18:06:14 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/kmer2meme.pl	Sun Jun 16 18:06:14 2013 -0400
@@ -0,0 +1,49 @@
+use strict;
+
+open(my $w_fh, "<", $ARGV[0]);
+my $num_kmers = $ARGV[1];
+my @weights = <$w_fh>;
+
+my @temp_k = @weights[8..(7+$num_kmers), (-$num_kmers..-1)];
+
+my @kmers = ();
+#cleanup kmers
+for my $i (0..($#temp_k)){
+	my @temp = split('\t',$temp_k[$i]);
+	#modified by dongwon 042713
+	#push(@kmers, ($temp[0], $temp[1]));
+	push(@kmers, $temp[0]);
+}
+
+open(my $o_fh, ">", "kmer2meme.meme");
+
+print $o_fh
+"MEME version 4
+
+ALPHABET= ACGT
+
+strands: + -
+
+Background letter frequencies (from no specific genome):
+A 0.25 C 0.25 G 0.25 T 0.25\n\n";
+
+foreach my $kmer (@kmers) {
+	print $o_fh "MOTIF $kmer\n";
+	my $l = length($kmer);
+	print $o_fh "letter-probability matrix: alength= 4 w= $l nsites= 1 E= 0\n";
+	foreach my $i (0..($l-1)) {
+		my $nc = substr($kmer, $i, 1);
+		if ($nc eq "A") {
+			print $o_fh " 1.00  0.00  0.00  0.00\n";
+		}elsif ($nc eq "C") {
+			print $o_fh " 0.00  1.00  0.00  0.00\n";
+		}elsif ($nc eq "G") {
+			print $o_fh " 0.00  0.00  1.00  0.00\n";
+		}elsif ($nc eq "T") {
+			print $o_fh " 0.00  0.00  0.00  1.00\n";
+		}else {
+			print " 0.25  0.25  0.25  0.25\n";
+		}
+	}
+	print $o_fh "\n";
+}