# HG changeset patch
# User cstrittmatter
# Date 1556426544 14400
# Node ID c9d98f5bc24035ec418e08a28ee53939b7431d0e
# Parent a0852bb4b09b4f3ae7e201915897e4f67d30c373
planemo upload commit 003cdb83fd17248ef57959332d58a3c96311332a-dirty
diff -r a0852bb4b09b -r c9d98f5bc240 README.md
--- a/README.md Thu Apr 25 07:59:05 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-# kmer_id
-Mitichondrial read identification by kmer database for Galaxy server
-
-File kmer_read_m3.cpp
-uses gzip library, compile with:
-g++ -O3 -std=c++0x kmer_read_m3.cpp -o kmerread -lz
-
-database files needed in same directory:
-(can create with kmer_build, but not described here yet)
-mitochondria_data.txt
-mitochondria_refkey.txt
-mitochondria_count.txt
-mitochondria_tree.txt
-mitochondria_probes.txt.gz
-1a.fasta (test input)
-
-File kmer_read_m3.py
-(Python 2.7)
-Run with:
-python kmer_read_m3.py -w [working directory] -d [output directory] -i [input filename1] [input filename2]
-
-Input files can be two paired files (.fastq, .fastq.gz, .fasta, .fasta.gz) or a single file with none as filename2
-Output is .csv file with read count and % abundance for each species.
diff -r a0852bb4b09b -r c9d98f5bc240 kmer_read_m3.py
--- a/kmer_read_m3.py Thu Apr 25 07:59:05 2019 -0400
+++ b/kmer_read_m3.py Sun Apr 28 00:42:24 2019 -0400
@@ -3,7 +3,7 @@
import re
import sys
-#import numpy as np -- removed numpy requirement
+import numpy as np
from subprocess import Popen, PIPE
if __name__ == '__main__':
@@ -65,57 +65,49 @@
if use == "1":
name_list.append(name)
factor_list.append(tested / hit / gensize)
- #factor_arr = np.array(factor_list)
+ factor_arr = np.array(factor_list)
num_targs = len(name_list)
+ print wdir
process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2])
(stdout, stderr) = process.communicate()
#read in output files
noid_list = []
- num_cols = 1 #only one sample, no matrix needed
- #m = np.zeros((num_targs,num_cols))
- m = [0.0 for _ in range(num_targs)]
+ read_ct = []
+ num_cols = 1
+ m = np.zeros((num_targs,num_cols))
col = 0
data_file = open(cname, 'r')
data = ''.join(data_file.readlines())
data_file.close()
lines = data.split('\n')
- read_ct = 0.0
+ read_ct.append(0.0)
index = 0
for line in lines:
if len(line) > 1:
t_s, count, uniq = line.split(',')
target = int(t_s)
- read_ct += float(count)
+ read_ct[col] += float(count)
if target > 0:
if in_use[target]:
- m[index] = float(count)
+ m[index, col] = float(count)
index += 1
else:
noid_list.append(int(count))
- #b = m * factor_arr[:,None] #normalize each row by kmer coverage
- #sums = np.sum(b, axis=0)
- #b = b / sums[None,:]
- #b = b * 100.0
- sum = 0.0
- b = []
- for i in range(num_targs):
- b1 = m[i] * factor_list[i]
- sum += b1
- b.append(b1)
- sum /= 100.0
- for i in range(num_targs):
- b[i] /=sum
- #rowmax = b.max(axis=1)
+ b = m * factor_arr[:,None] #normalize each row by kmer coverage
+ sums = np.sum(b, axis=0)
+ b = b / sums[None,:]
+ b = b * 100.0
+ rowmax = b.max(axis=1)
out_file = open(oname, 'w')
output = "taxid,reads,abundance\n"
out_file.write(output)
output = "total,"
- #for i in range(num_cols):
- output += str(read_ct) + ",,"
+ for i in range(num_cols):
+ output += str(read_ct[i]) + ",,"
output += "\n"
out_file.write(output)
output = "no_id,"
@@ -125,10 +117,10 @@
out_file.write(output)
for i in range(num_targs):
#l = order_row[i]
- if m[i] > 0: #only output non-zero results
+ if rowmax[i] > 0.000: #only output non-zero results
output = name_list[i]
for j in range(num_cols):
- output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i])
+ output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j])
output += "\n"
out_file.write(output)
out_file.close()
diff -r a0852bb4b09b -r c9d98f5bc240 mitokmer.1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mitokmer.1.xml Sun Apr 28 00:42:24 2019 -0400
@@ -0,0 +1,82 @@
+
+ Eukaryotic abundance prediction by mitochondrial content
+
+ python
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ meh
+ }
+
+
+
\ No newline at end of file
diff -r a0852bb4b09b -r c9d98f5bc240 mitokmer.xml
--- a/mitokmer.xml Thu Apr 25 07:59:05 2019 -0400
+++ b/mitokmer.xml Sun Apr 28 00:42:24 2019 -0400
@@ -38,7 +38,10 @@
python $__tool_directory__/kmer_read_m3.py
-w $__tool_directory__
-d $__tool_directory__/output
- -i $__tool_directory__/$fwd $__tool_directory__/$rev
+ -i $__tool_directory__/$fwd $__tool_directory__/$rev;
+ echo "run cat";
+ rm $__tool_directory__/results*;
+ cat $__tool_directory__/output/mitokmer_result.csv > $__tool_directory__/results.csv;
]]>
@@ -58,13 +61,13 @@
-
+
-
+
diff -r a0852bb4b09b -r c9d98f5bc240 output/mitokmer_result.1.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/output/mitokmer_result.1.csv Sun Apr 28 00:42:24 2019 -0400
@@ -0,0 +1,8 @@
+axid,reads,abundance,test
+total,4192977.0,tes,test
+no_id,4192954,test,test
+Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932
+Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185
+Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228
+Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674
+Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980
\ No newline at end of file
diff -r a0852bb4b09b -r c9d98f5bc240 output/mitokmer_result.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/output/mitokmer_result.txt Sun Apr 28 00:42:24 2019 -0400
@@ -0,0 +1,8 @@
+axid,reads,abundance,test
+total,4192977.0,tes,test
+no_id,4192954,test,test
+Mammalia_Rodentia_Cricetidae_Neotomodon_alstoni,17,88.932
+Mammalia_Rodentia_Cricetidae_Peromyscus_crinitus,1,5.185
+Mesostigmatophyceae_Mesostigmatales_Mesostigmataceae_Mesostigma_viride,1,2.228
+Oligohymenophorea_Hymenostomatida_none_Ichthyophthirius_multifiliis,1,1.674
+Oligohymenophorea_Peniculida_Parameciidae_Paramecium_caudatum,1,1.980
\ No newline at end of file
diff -r a0852bb4b09b -r c9d98f5bc240 results.csv