Mercurial > repos > public-health-bioinformatics > fastp_json_to_tabular
comparison fastp_json_to_tabular.py @ 0:091a2fb2e7ad draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/fastp_json_to_tabular commit 888d26702a84c2f8fd1428aff8cd869e94cc0bae"
author | public-health-bioinformatics |
---|---|
date | Thu, 10 Mar 2022 21:59:56 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:091a2fb2e7ad |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import json | |
5 | |
6 def main(args): | |
7 with open(args.fastp_json, 'r') as f: | |
8 fastp_report = json.load(f) | |
9 | |
10 reads_single_paired = fastp_report['summary']['sequencing'].split(' ')[0] | |
11 | |
12 if reads_single_paired == 'paired': | |
13 total_read_pairs_before_filtering = str(int(int(fastp_report['summary']['before_filtering']['total_reads']) / 2)) | |
14 total_read_pairs_after_filtering = str(int(int(fastp_report['summary']['after_filtering']['total_reads']) / 2)) | |
15 read2_mean_length_before_filtering = fastp_report['summary']['before_filtering']['read2_mean_length'] | |
16 read2_mean_length_after_filtering = fastp_report['summary']['after_filtering']['read2_mean_length'] | |
17 else: | |
18 total_read_pairs_before_filtering = 'NA' | |
19 total_read_pairs_after_filtering = 'NA' | |
20 read2_mean_length_before_filtering = 'NA' | |
21 read2_mean_length_after_filtering = 'NA' | |
22 | |
23 total_reads_before_filtering = fastp_report['summary']['before_filtering']['total_reads'] | |
24 total_reads_after_filtering = fastp_report['summary']['after_filtering']['total_reads'] | |
25 | |
26 total_bases_before_filtering = fastp_report['summary']['before_filtering']['total_bases'] | |
27 total_bases_after_filtering = fastp_report['summary']['after_filtering']['total_bases'] | |
28 | |
29 read1_mean_length_before_filtering = fastp_report['summary']['before_filtering']['read1_mean_length'] | |
30 read1_mean_length_after_filtering = fastp_report['summary']['after_filtering']['read1_mean_length'] | |
31 | |
32 q20_bases_before_filtering = fastp_report['summary']['before_filtering']['q20_bases'] | |
33 q20_bases_after_filtering = fastp_report['summary']['after_filtering']['q20_bases'] | |
34 | |
35 q20_rate_before_filtering = fastp_report['summary']['before_filtering']['q20_rate'] | |
36 q20_rate_after_filtering = fastp_report['summary']['after_filtering']['q20_rate'] | |
37 | |
38 q30_bases_before_filtering = fastp_report['summary']['before_filtering']['q30_bases'] | |
39 q30_bases_after_filtering = fastp_report['summary']['after_filtering']['q30_bases'] | |
40 | |
41 q30_rate_before_filtering = fastp_report['summary']['before_filtering']['q30_rate'] | |
42 q30_rate_after_filtering = fastp_report['summary']['after_filtering']['q30_rate'] | |
43 | |
44 gc_content_before_filtering = fastp_report['summary']['before_filtering']['gc_content'] | |
45 gc_content_after_filtering = fastp_report['summary']['after_filtering']['gc_content'] | |
46 if 'adapter_cutting' in fastp_report: | |
47 adapter_trimmed_reads = fastp_report['adapter_cutting']['adapter_trimmed_reads'] | |
48 adapter_trimmed_bases = fastp_report['adapter_cutting']['adapter_trimmed_bases'] | |
49 else: | |
50 adapter_trimmed_reads = 0 | |
51 adapter_trimmed_bases = 0 | |
52 | |
53 output_fields = [ | |
54 'total_reads_before_filtering', | |
55 'total_read_pairs_before_filtering', | |
56 'total_reads_after_filtering', | |
57 'total_read_pairs_after_filtering', | |
58 'total_bases_before_filtering', | |
59 'total_bases_after_filtering', | |
60 'read1_mean_length_before_filtering', | |
61 'read1_mean_length_after_filtering', | |
62 'read2_mean_length_before_filtering', | |
63 'read2_mean_length_after_filtering', | |
64 'q20_bases_before_filtering', | |
65 'q20_bases_after_filtering', | |
66 'q20_rate_before_filtering', | |
67 'q20_rate_after_filtering', | |
68 'q30_bases_before_filtering', | |
69 'q30_bases_after_filtering', | |
70 'q30_rate_before_filtering', | |
71 'q30_rate_after_filtering', | |
72 'gc_content_before_filtering', | |
73 'gc_content_after_filtering', | |
74 'adapter_trimmed_reads', | |
75 'adapter_trimmed_bases', | |
76 ] | |
77 | |
78 output_data = [] | |
79 if args.sample_id: | |
80 output_fields = ['sample_id'] + output_fields | |
81 output_data = [args.sample_id] | |
82 | |
83 print(args.delimiter.join(output_fields)) | |
84 | |
85 output_data = output_data + [ | |
86 total_reads_before_filtering, | |
87 total_read_pairs_before_filtering, | |
88 total_reads_after_filtering, | |
89 total_read_pairs_after_filtering, | |
90 total_bases_before_filtering, | |
91 total_bases_after_filtering, | |
92 read1_mean_length_before_filtering, | |
93 read1_mean_length_after_filtering, | |
94 read2_mean_length_before_filtering, | |
95 read2_mean_length_after_filtering, | |
96 q20_bases_before_filtering, | |
97 q20_bases_after_filtering, | |
98 q20_rate_before_filtering, | |
99 q20_rate_after_filtering, | |
100 q30_bases_before_filtering, | |
101 q30_bases_after_filtering, | |
102 q30_rate_before_filtering, | |
103 q30_rate_after_filtering, | |
104 gc_content_before_filtering, | |
105 gc_content_after_filtering, | |
106 adapter_trimmed_reads, | |
107 adapter_trimmed_bases, | |
108 ] | |
109 print(args.delimiter.join(map(str, output_data))) | |
110 | |
111 | |
112 if __name__ == "__main__": | |
113 parser = argparse.ArgumentParser() | |
114 parser.add_argument('fastp_json') | |
115 parser.add_argument('-s', '--sample-id') | |
116 parser.add_argument('-d', '--delimiter', default='\t') | |
117 args = parser.parse_args() | |
118 main(args) |