comparison fastp_json_to_tabular.py @ 0:091a2fb2e7ad draft default tip

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/fastp_json_to_tabular commit 888d26702a84c2f8fd1428aff8cd869e94cc0bae"
author public-health-bioinformatics
date Thu, 10 Mar 2022 21:59:56 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:091a2fb2e7ad
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5
6 def main(args):
7 with open(args.fastp_json, 'r') as f:
8 fastp_report = json.load(f)
9
10 reads_single_paired = fastp_report['summary']['sequencing'].split(' ')[0]
11
12 if reads_single_paired == 'paired':
13 total_read_pairs_before_filtering = str(int(int(fastp_report['summary']['before_filtering']['total_reads']) / 2))
14 total_read_pairs_after_filtering = str(int(int(fastp_report['summary']['after_filtering']['total_reads']) / 2))
15 read2_mean_length_before_filtering = fastp_report['summary']['before_filtering']['read2_mean_length']
16 read2_mean_length_after_filtering = fastp_report['summary']['after_filtering']['read2_mean_length']
17 else:
18 total_read_pairs_before_filtering = 'NA'
19 total_read_pairs_after_filtering = 'NA'
20 read2_mean_length_before_filtering = 'NA'
21 read2_mean_length_after_filtering = 'NA'
22
23 total_reads_before_filtering = fastp_report['summary']['before_filtering']['total_reads']
24 total_reads_after_filtering = fastp_report['summary']['after_filtering']['total_reads']
25
26 total_bases_before_filtering = fastp_report['summary']['before_filtering']['total_bases']
27 total_bases_after_filtering = fastp_report['summary']['after_filtering']['total_bases']
28
29 read1_mean_length_before_filtering = fastp_report['summary']['before_filtering']['read1_mean_length']
30 read1_mean_length_after_filtering = fastp_report['summary']['after_filtering']['read1_mean_length']
31
32 q20_bases_before_filtering = fastp_report['summary']['before_filtering']['q20_bases']
33 q20_bases_after_filtering = fastp_report['summary']['after_filtering']['q20_bases']
34
35 q20_rate_before_filtering = fastp_report['summary']['before_filtering']['q20_rate']
36 q20_rate_after_filtering = fastp_report['summary']['after_filtering']['q20_rate']
37
38 q30_bases_before_filtering = fastp_report['summary']['before_filtering']['q30_bases']
39 q30_bases_after_filtering = fastp_report['summary']['after_filtering']['q30_bases']
40
41 q30_rate_before_filtering = fastp_report['summary']['before_filtering']['q30_rate']
42 q30_rate_after_filtering = fastp_report['summary']['after_filtering']['q30_rate']
43
44 gc_content_before_filtering = fastp_report['summary']['before_filtering']['gc_content']
45 gc_content_after_filtering = fastp_report['summary']['after_filtering']['gc_content']
46 if 'adapter_cutting' in fastp_report:
47 adapter_trimmed_reads = fastp_report['adapter_cutting']['adapter_trimmed_reads']
48 adapter_trimmed_bases = fastp_report['adapter_cutting']['adapter_trimmed_bases']
49 else:
50 adapter_trimmed_reads = 0
51 adapter_trimmed_bases = 0
52
53 output_fields = [
54 'total_reads_before_filtering',
55 'total_read_pairs_before_filtering',
56 'total_reads_after_filtering',
57 'total_read_pairs_after_filtering',
58 'total_bases_before_filtering',
59 'total_bases_after_filtering',
60 'read1_mean_length_before_filtering',
61 'read1_mean_length_after_filtering',
62 'read2_mean_length_before_filtering',
63 'read2_mean_length_after_filtering',
64 'q20_bases_before_filtering',
65 'q20_bases_after_filtering',
66 'q20_rate_before_filtering',
67 'q20_rate_after_filtering',
68 'q30_bases_before_filtering',
69 'q30_bases_after_filtering',
70 'q30_rate_before_filtering',
71 'q30_rate_after_filtering',
72 'gc_content_before_filtering',
73 'gc_content_after_filtering',
74 'adapter_trimmed_reads',
75 'adapter_trimmed_bases',
76 ]
77
78 output_data = []
79 if args.sample_id:
80 output_fields = ['sample_id'] + output_fields
81 output_data = [args.sample_id]
82
83 print(args.delimiter.join(output_fields))
84
85 output_data = output_data + [
86 total_reads_before_filtering,
87 total_read_pairs_before_filtering,
88 total_reads_after_filtering,
89 total_read_pairs_after_filtering,
90 total_bases_before_filtering,
91 total_bases_after_filtering,
92 read1_mean_length_before_filtering,
93 read1_mean_length_after_filtering,
94 read2_mean_length_before_filtering,
95 read2_mean_length_after_filtering,
96 q20_bases_before_filtering,
97 q20_bases_after_filtering,
98 q20_rate_before_filtering,
99 q20_rate_after_filtering,
100 q30_bases_before_filtering,
101 q30_bases_after_filtering,
102 q30_rate_before_filtering,
103 q30_rate_after_filtering,
104 gc_content_before_filtering,
105 gc_content_after_filtering,
106 adapter_trimmed_reads,
107 adapter_trimmed_bases,
108 ]
109 print(args.delimiter.join(map(str, output_data)))
110
111
112 if __name__ == "__main__":
113 parser = argparse.ArgumentParser()
114 parser.add_argument('fastp_json')
115 parser.add_argument('-s', '--sample-id')
116 parser.add_argument('-d', '--delimiter', default='\t')
117 args = parser.parse_args()
118 main(args)