Mercurial > repos > qfabrepo > metadegalaxy_pear_stats

diff pear_stats.py @ 0:ec62f17fcfe6 draft default tip
"planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
author: qfabrepo
date: Mon, 14 Sep 2020 04:50:28 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pear_stats.py	Mon Sep 14 04:50:28 2020 +0000
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+import sys
+import argparse
+
+parser = argparse.ArgumentParser(
+    description="Parse multiple Pear statistic log to a tabular format\n" +
+                "Example:\n python pear_stats.py -i \"file1.log,file2.log\" -s \"samplename1 samplename2\" -o outputfile")
+parser.add_argument("-v","--version",action="version",version="%(prog)s 1.0")
+parser.add_argument("-i","--input",dest="inputfilelist",default=False,help="a list of input file")
+parser.add_argument("-s","--samplename", dest="samplename",default=False,help="a list of input filename")
+parser.add_argument("-o","--outfile",dest="outputfile",default=False,help="Pear statistic output")
+
+
+if(len(sys.argv) == 1):
+       parser.print_help(sys.stderr)
+       sys.exit()
+
+args = parser.parse_args()
+
+tags = ['Assembled reads','Discarded reads','Not assembled reads']
+LINESTART=30
+LINEEND  =LINESTART+2
+
+
+inputfiles=args.inputfilelist.split(',')
+inputfilenames=args.samplename.split(',')
+outputfile=open(args.outputfile,'w')
+
+allAssembled = 0
+
+def processfile(instr):
+	result=[]
+	with open(instr,'r') as f:
+		for linenum,line in enumerate(f):
+			if LINESTART <= linenum <= LINEEND:
+				ix = linenum-LINESTART
+				if (line.startswith(tags[ix])):
+					result.append(line.rstrip())
+					if (ix == 0):
+						token = line.strip().split('(')[1]
+						token = token.replace("%)","")
+						global allAssembled
+						allAssembled += float(token)
+					else:
+						print("ARGH!:", line)
+	return(result)
+
+for element in range(0,len(inputfiles)):
+    output=processfile(inputfiles[element])
+    output.insert(0,inputfilenames[element])
+    outputfile.write("\t".join(output))
+    outputfile.write("\n")
+
+averageAssembled = allAssembled / len(inputfiles)
+
+averageAssembledOut=["The above assessment has been performed on 1000 randomly selected reads per sample file.\nAverage % of overlapping paired-end reads =",str(averageAssembled),"\nIf the average percentage is greater than 50%, you can consider using workflow 16S_biodiversity_for_overlap_PE.\nHowever, if the average percentage is less than 50%, use 16S_biodiversity_nonoverlap_PE."]
+
+
+outputfile.write("\n\n\n")
+outputfile.write("\t".join(averageAssembledOut))
+outputfile.close()
author	qfabrepo
date	Mon, 14 Sep 2020 04:50:28 +0000
parents
children