annotate pear_stats.py @ 0:ec62f17fcfe6 draft default tip

"planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
author qfabrepo
date Mon, 14 Sep 2020 04:50:28 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
1 #!/usr/bin/env python
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
2 import sys
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
3 import argparse
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
4
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
5 parser = argparse.ArgumentParser(
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
6 description="Parse multiple Pear statistic log to a tabular format\n" +
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
7 "Example:\n python pear_stats.py -i \"file1.log,file2.log\" -s \"samplename1 samplename2\" -o outputfile")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
8 parser.add_argument("-v","--version",action="version",version="%(prog)s 1.0")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
9 parser.add_argument("-i","--input",dest="inputfilelist",default=False,help="a list of input file")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
10 parser.add_argument("-s","--samplename", dest="samplename",default=False,help="a list of input filename")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
11 parser.add_argument("-o","--outfile",dest="outputfile",default=False,help="Pear statistic output")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
12
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
13
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
14 if(len(sys.argv) == 1):
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
15 parser.print_help(sys.stderr)
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
16 sys.exit()
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
17
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
18 args = parser.parse_args()
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
19
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
20 tags = ['Assembled reads','Discarded reads','Not assembled reads']
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
21 LINESTART=30
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
22 LINEEND =LINESTART+2
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
23
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
24
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
25 inputfiles=args.inputfilelist.split(',')
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
26 inputfilenames=args.samplename.split(',')
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
27 outputfile=open(args.outputfile,'w')
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
28
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
29 allAssembled = 0
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
30
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
31 def processfile(instr):
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
32 result=[]
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
33 with open(instr,'r') as f:
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
34 for linenum,line in enumerate(f):
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
35 if LINESTART <= linenum <= LINEEND:
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
36 ix = linenum-LINESTART
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
37 if (line.startswith(tags[ix])):
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
38 result.append(line.rstrip())
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
39 if (ix == 0):
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
40 token = line.strip().split('(')[1]
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
41 token = token.replace("%)","")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
42 global allAssembled
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
43 allAssembled += float(token)
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
44 else:
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
45 print("ARGH!:", line)
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
46 return(result)
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
47
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
48 for element in range(0,len(inputfiles)):
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
49 output=processfile(inputfiles[element])
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
50 output.insert(0,inputfilenames[element])
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
51 outputfile.write("\t".join(output))
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
52 outputfile.write("\n")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
53
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
54 averageAssembled = allAssembled / len(inputfiles)
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
55
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
56 averageAssembledOut=["The above assessment has been performed on 1000 randomly selected reads per sample file.\nAverage % of overlapping paired-end reads =",str(averageAssembled),"\nIf the average percentage is greater than 50%, you can consider using workflow 16S_biodiversity_for_overlap_PE.\nHowever, if the average percentage is less than 50%, use 16S_biodiversity_nonoverlap_PE."]
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
57
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
58
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
59 outputfile.write("\n\n\n")
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
60 outputfile.write("\t".join(averageAssembledOut))
ec62f17fcfe6 "planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
qfabrepo
parents:
diff changeset
61 outputfile.close()