Mercurial > repos > iss > eurl_vtec_wgs_pt
annotate scripts/ReMatCh/rematch.py @ 6:20ff3dca457f draft default tip
planemo upload commit 6857c749c21f580c828aba3543e294b69d32b662
author | iss |
---|---|
date | Mon, 23 Oct 2023 11:45:36 +0000 |
parents | c6bab5103a14 |
children |
rev | line source |
---|---|
0
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
2 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
3 # -*- coding: utf-8 -*- |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
4 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
5 """ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
6 rematch.py - Reads mapping against target sequences, checking mapping |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
7 and consensus sequences production |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
8 <https://github.com/B-UMMI/ReMatCh/> |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
9 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
10 Copyright (C) 2019 Miguel Machado <mpmachado@medicina.ulisboa.pt> |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
11 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
12 Last modified: August 08, 2019 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
13 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
14 This program is free software: you can redistribute it and/or modify |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
15 it under the terms of the GNU General Public License as published by |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
16 the Free Software Foundation, either version 3 of the License, or |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
17 (at your option) any later version. |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
18 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
19 This program is distributed in the hope that it will be useful, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
20 but WITHOUT ANY WARRANTY; without even the implied warranty of |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
22 GNU General Public License for more details. |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
23 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
24 You should have received a copy of the GNU General Public License |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
25 along with this program. If not, see <http://www.gnu.org/licenses/>. |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
26 """ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
27 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
28 import os |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
29 import sys |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
30 import time |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
31 import argparse |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
32 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
33 try: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
34 from __init__ import __version__ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
35 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
36 import modules.utils as utils |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
37 import modules.seqFromWebTaxon as seq_from_web_taxon |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
38 import modules.download as download |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
39 import modules.rematch_module as rematch_module |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
40 import modules.checkMLST as check_mlst |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
41 except ImportError: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
42 from ReMatCh.__init__ import __version__ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
43 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
44 from ReMatCh.modules import utils as utils |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
45 from ReMatCh.modules import seqFromWebTaxon as seq_from_web_taxon |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
46 from ReMatCh.modules import download as download |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
47 from ReMatCh.modules import rematch_module as rematch_module |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
48 from ReMatCh.modules import checkMLST as check_mlst |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
49 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
50 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
51 def search_fastq_files(directory): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
52 files_extensions = ['.fastq.gz', '.fq.gz'] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
53 pair_end_files_separation = [['_R1_001.f', '_R2_001.f'], ['_1.f', '_2.f']] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
54 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
55 list_ids = {} |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
56 directories = [d for d in os.listdir(directory) if |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
57 not d.startswith('.') and os.path.isdir(os.path.join(directory, d, ''))] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
58 for directory_found in directories: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
59 if directory_found != 'pubmlst': |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
60 directory_path = os.path.join(directory, directory_found, '') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
61 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
62 fastq_found = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
63 files = [f for f in os.listdir(directory_path) if |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
64 not f.startswith('.') and os.path.isfile(os.path.join(directory_path, f))] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
65 for file_found in files: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
66 if file_found.endswith(tuple(files_extensions)): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
67 fastq_found.append(file_found) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
68 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
69 if len(fastq_found) == 1: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
70 list_ids[directory_found] = [os.path.join(directory_path, f) for f in fastq_found] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
71 elif len(fastq_found) >= 2: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
72 file_pair = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
73 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
74 # Search pairs |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
75 for pe_separation in pair_end_files_separation: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
76 for fastq in fastq_found: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
77 if pe_separation[0] in fastq or pe_separation[1] in fastq: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
78 file_pair.append(fastq) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
79 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
80 if len(file_pair) == 2: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
81 break |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
82 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
83 file_pair = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
84 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
85 # Search single |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
86 if len(file_pair) == 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
87 for pe_separation in pair_end_files_separation: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
88 for fastq in fastq_found: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
89 if pe_separation[0] not in fastq or pe_separation[1] not in fastq: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
90 file_pair.append(fastq) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
91 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
92 if len(file_pair) >= 1: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
93 file_pair = file_pair[0] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
94 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
95 if len(file_pair) >= 1: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
96 list_ids[directory_found] = [os.path.join(directory_path, f) for f in file_pair] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
97 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
98 return list_ids |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
99 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
100 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
101 def get_list_ids_from_file(file_list_ids): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
102 list_ids = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
103 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
104 with open(file_list_ids, 'rtU') as lines: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
105 for line in lines: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
106 line = line.rstrip('\r\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
107 if len(line) > 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
108 list_ids.append(line) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
109 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
110 if len(list_ids) == 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
111 sys.exit('No runIDs were found in ' + file_list_ids) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
112 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
113 return list_ids |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
114 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
115 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
116 def get_taxon_run_ids(taxon_name, outputfile): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
117 seq_from_web_taxon.run_seq_from_web_taxon(taxon_name, outputfile, True, True, True, False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
118 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
119 run_ids = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
120 with open(outputfile, 'rtU') as reader: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
121 for line in reader: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
122 line = line.rstrip('\r\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
123 if len(line) > 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
124 if not line.startswith('#'): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
125 line = line.split('\t') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
126 run_ids.append(line[0]) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
127 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
128 return run_ids |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
129 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
130 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
131 def get_list_ids(workdir, file_list_ids, taxon_name): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
132 searched_fastq_files = False |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
133 list_ids = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
134 if file_list_ids is None and taxon_name is None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
135 list_ids = search_fastq_files(workdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
136 searched_fastq_files = True |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
137 elif file_list_ids is not None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
138 list_ids = get_list_ids_from_file(os.path.abspath(file_list_ids)) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
139 elif taxon_name is not None and file_list_ids is None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
140 list_ids = get_taxon_run_ids(taxon_name, os.path.join(workdir, 'IDs_list.seqFromWebTaxon.tab')) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
141 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
142 if len(list_ids) == 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
143 sys.exit('No IDs were found') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
144 return list_ids, searched_fastq_files |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
145 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
146 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
147 def format_gene_info(gene_specific_info, minimum_gene_coverage, minimum_gene_identity, reported_data_type, summary, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
148 sample, genes_present): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
149 info = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
150 if gene_specific_info['gene_coverage'] >= minimum_gene_coverage and \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
151 gene_specific_info['gene_identity'] >= minimum_gene_identity: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
152 if summary and sample not in genes_present: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
153 genes_present[sample] = {} |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
154 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
155 if gene_specific_info['gene_number_positions_multiple_alleles'] == 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
156 s = str(gene_specific_info[reported_data_type]) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
157 info = str(s) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
158 if summary: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
159 genes_present[sample][gene_specific_info['header']] = str(s) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
160 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
161 s = 'multiAlleles_' + str(gene_specific_info[reported_data_type]) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
162 info = str(s) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
163 if summary: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
164 genes_present[sample][gene_specific_info['header']] = str(s) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
165 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
166 info = 'absent_' + str(gene_specific_info[reported_data_type]) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
167 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
168 return info, genes_present |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
169 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
170 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
171 def write_data_by_gene(gene_list_reference, minimum_gene_coverage, sample, data_by_gene, outdir, time_str, run_times, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
172 minimum_gene_identity, reported_data_type, summary, genes_present): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
173 combined_report = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
174 os.path.join(outdir, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
175 'combined_report.data_by_gene.' + run_times + '.' + reported_data_type + '.' + time_str + '.tab') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
176 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
177 if reported_data_type == 'coverage_depth': |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
178 reported_data_type = 'gene_mean_read_coverage' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
179 elif reported_data_type == 'sequence_coverage': |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
180 reported_data_type = 'gene_coverage' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
181 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
182 combined_report_exist = os.path.isfile(combined_report) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
183 with open(combined_report, 'at') as writer: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
184 seq_list = sorted(gene_list_reference.keys()) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
185 if not combined_report_exist: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
186 writer.write('#sample' + '\t' + '\t'.join([gene_list_reference[seq] for seq in seq_list]) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
187 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
188 results = {} |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
189 headers = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
190 for i in data_by_gene: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
191 results[data_by_gene[i]['header']], genes_present = format_gene_info(data_by_gene[i], minimum_gene_coverage, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
192 minimum_gene_identity, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
193 reported_data_type, summary, sample, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
194 genes_present) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
195 headers.append(data_by_gene[i]['header']) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
196 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
197 if len(headers) != gene_list_reference: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
198 for gene in gene_list_reference: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
199 if gene not in headers: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
200 results[gene] = 'NA' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
201 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
202 writer.write(sample + '\t' + '\t'.join([results[seq] for seq in seq_list]) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
203 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
204 return genes_present |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
205 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
206 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
207 def write_sample_report(sample, outdir, time_str, file_size, run_successfully_fastq, run_successfully_rematch_first, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
208 run_successfully_rematch_second, time_taken_fastq, time_taken_rematch_first, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
209 time_taken_rematch_second, time_taken_sample, sequencing_information, sample_data_general_first, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
210 sample_data_general_second, fastq_used): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
211 sample_report = os.path.join(outdir, 'sample_report.' + time_str + '.tab') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
212 report_exist = os.path.isfile(sample_report) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
213 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
214 header_general = ['sample', 'sample_run_successfully', 'sample_run_time', 'files_size', 'download_run_successfully', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
215 'download_run_time', 'rematch_run_successfully_first', 'rematch_run_time_first', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
216 'rematch_run_successfully_second', 'rematch_run_time_second'] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
217 header_data_general = ['number_absent_genes', 'number_genes_multiple_alleles', 'mean_sample_coverage'] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
218 header_sequencing = ['run_accession', 'instrument_platform', 'instrument_model', 'library_layout', 'library_source', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
219 'extra_run_accession', 'nominal_length', 'read_count', 'base_count', 'date_download'] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
220 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
221 with open(sample_report, 'at') as writer: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
222 if not report_exist: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
223 writer.write('#' + '\t'.join(header_general) + '\t' + '_first\t'.join(header_data_general) + '_first\t' + |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
224 '_second\t'.join(header_data_general) + '_second\t' + '\t'.join(header_sequencing) + '\t' + |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
225 'fastq_used' + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
226 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
227 writer.write('\t'.join([sample, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
228 str(all([run_successfully_fastq is not False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
229 run_successfully_rematch_first is not False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
230 run_successfully_rematch_second is not False])), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
231 str(time_taken_sample), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
232 str(file_size), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
233 str(run_successfully_fastq), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
234 str(time_taken_fastq), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
235 str(run_successfully_rematch_first), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
236 str(time_taken_rematch_first), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
237 str(run_successfully_rematch_second), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
238 str(time_taken_rematch_second)]) + |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
239 '\t' + '\t'.join([str(sample_data_general_first[i]) for i in header_data_general]) + |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
240 '\t' + '\t'.join([str(sample_data_general_second[i]) for i in header_data_general]) + |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
241 '\t' + '\t'.join([str(sequencing_information[i]) for i in header_sequencing]) + |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
242 '\t' + ','.join(fastq_used) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
243 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
244 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
245 def concatenate_extra_seq_2_consensus(consensus_sequence, reference_sequence, extra_seq_length, outdir): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
246 reference_dict, ignore, ignore = rematch_module.get_sequence_information(reference_sequence, extra_seq_length) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
247 consensus_dict, genes, ignore = rematch_module.get_sequence_information(consensus_sequence, 0) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
248 number_consensus_with_sequences = 0 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
249 for k, values_consensus in list(consensus_dict.items()): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
250 for values_reference in list(reference_dict.values()): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
251 if values_reference['header'] == values_consensus['header']: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
252 if len(set(consensus_dict[k]['sequence'])) > 1: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
253 number_consensus_with_sequences += 1 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
254 if extra_seq_length <= len(values_reference['sequence']): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
255 right_extra_seq = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
256 '' if extra_seq_length == 0 else values_reference['sequence'][-extra_seq_length:] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
257 consensus_dict[k]['sequence'] = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
258 values_reference['sequence'][:extra_seq_length] + \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
259 consensus_dict[k]['sequence'] + \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
260 right_extra_seq |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
261 consensus_dict[k]['length'] += extra_seq_length + len(right_extra_seq) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
262 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
263 consensus_concatenated = os.path.join(outdir, 'consensus_concatenated_extraSeq.fasta') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
264 with open(consensus_concatenated, 'wt') as writer: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
265 for i in consensus_dict: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
266 writer.write('>' + consensus_dict[i]['header'] + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
267 fasta_sequence_lines = rematch_module.chunkstring(consensus_dict[i]['sequence'], 80) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
268 for line in fasta_sequence_lines: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
269 writer.write(line + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
270 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
271 return consensus_concatenated, genes, consensus_dict, number_consensus_with_sequences |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
272 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
273 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
274 def clean_headers_reference_file(reference_file, outdir, extra_seq): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
275 problematic_characters = ["|", " ", ",", ".", "(", ")", "'", "/", ":"] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
276 print('Checking if reference sequences contain ' + str(problematic_characters) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
277 # headers_changed = False |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
278 new_reference_file = str(reference_file) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
279 sequences, genes, headers_changed = rematch_module.get_sequence_information(reference_file, extra_seq) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
280 if headers_changed: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
281 print('At least one of the those characters was found. Replacing those with _' + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
282 new_reference_file = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
283 os.path.join(outdir, os.path.splitext(os.path.basename(reference_file))[0] + '.headers_renamed.fasta') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
284 with open(new_reference_file, 'wt') as writer: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
285 for i in sequences: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
286 writer.write('>' + sequences[i]['header'] + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
287 fasta_sequence_lines = rematch_module.chunkstring(sequences[i]['sequence'], 80) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
288 for line in fasta_sequence_lines: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
289 writer.write(line + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
290 return new_reference_file, genes, sequences |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
291 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
292 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
293 def write_mlst_report(sample, run_times, consensus_type, st, alleles_profile, loci_order, outdir, time_str): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
294 mlst_report = os.path.join(outdir, 'mlst_report.' + time_str + '.tab') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
295 mlst_report_exist = os.path.isfile(mlst_report) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
296 with open(mlst_report, 'at') as writer: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
297 if not mlst_report_exist: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
298 writer.write('\t'.join(['#sample', 'ReMatCh_run', 'consensus_type', 'ST'] + loci_order) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
299 writer.write('\t'.join([sample, run_times, consensus_type, str(st)] + alleles_profile.split(',')) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
300 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
301 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
302 def run_get_st(sample, mlst_dicts, consensus_sequences, mlst_consensus, run_times, outdir, time_str): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
303 if mlst_consensus == 'all': |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
304 for consensus_type in consensus_sequences: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
305 print('Searching MLST for ' + consensus_type + ' consensus') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
306 st, alleles_profile = check_mlst.get_st(mlst_dicts, consensus_sequences[consensus_type]) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
307 write_mlst_report(sample, run_times, consensus_type, st, alleles_profile, mlst_dicts[2], outdir, time_str) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
308 print('ST found: ' + str(st) + ' (' + alleles_profile + ')') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
309 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
310 st, alleles_profile = check_mlst.get_st(mlst_dicts, consensus_sequences[mlst_consensus]) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
311 write_mlst_report(sample, run_times, mlst_consensus, st, alleles_profile, mlst_dicts[2], outdir, time_str) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
312 print('ST found for ' + mlst_consensus + ' consensus: ' + str(st) + ' (' + alleles_profile + ')') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
313 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
314 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
315 def write_summary_report(outdir, reported_data_type, time_str, gene_list_reference, genes_present): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
316 with open(os.path.join(outdir, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
317 'summary.{reported_data_type}.{time_str}.tab'.format(reported_data_type=reported_data_type, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
318 time_str=time_str)), 'wt') as writer: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
319 seq_list = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
320 for info in list(genes_present.values()): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
321 seq_list.extend(list(info.keys())) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
322 seq_list = list(set(seq_list)) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
323 writer.write('#sample' + '\t' + '\t'.join([gene_list_reference[seq] for seq in sorted(seq_list)]) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
324 for sample, info in list(genes_present.items()): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
325 data = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
326 for seq in sorted(seq_list): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
327 if seq in info: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
328 data.append(info[seq]) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
329 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
330 data.append('NF') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
331 writer.write(sample + '\t' + '\t'.join(data) + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
332 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
333 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
334 def run_rematch(args): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
335 workdir = os.path.abspath(args.workdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
336 if not os.path.isdir(workdir): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
337 os.makedirs(workdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
338 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
339 aspera_key = os.path.abspath(args.asperaKey.name) if args.asperaKey is not None else None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
340 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
341 # Start logger |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
342 logfile, time_str = utils.start_logger(workdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
343 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
344 # Get general information |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
345 script_path = utils.general_information(logfile, __version__, workdir, time_str, args.doNotUseProvidedSoftware, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
346 aspera_key, args.downloadCramBam, args.SRA, args.SRAopt) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
347 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
348 # Set list_ids |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
349 list_ids, searched_fastq_files = get_list_ids(workdir, args.listIDs.name if args.listIDs is not None else None, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
350 args.taxon) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
351 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
352 mlst_sequences = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
353 mlst_dicts = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
354 if args.mlst is not None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
355 time_taken_pub_mlst, mlst_dicts, mlst_sequences = check_mlst.download_pub_mlst_xml(args.mlst, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
356 args.mlstSchemaNumber, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
357 workdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
358 args.softClip_recodeRun = 'first' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
359 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
360 if args.reference is None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
361 if args.mlst is not None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
362 reference_file = check_mlst.check_existing_schema(args.mlst, args.mlstSchemaNumber, script_path) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
363 args.extraSeq = 200 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
364 if reference_file is None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
365 print('It was not found provided MLST scheme sequences for ' + args.mlst) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
366 print('Trying to obtain reference MLST sequences from PubMLST') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
367 if len(mlst_sequences) > 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
368 reference_file = check_mlst.write_mlst_reference(args.mlst, mlst_sequences, workdir, time_str) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
369 args.extraSeq = 0 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
370 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
371 sys.exit('It was not possible to download MLST sequences from PubMLST!') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
372 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
373 print('Using provided scheme as referece: ' + reference_file) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
374 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
375 sys.exit('Need to provide at least one of the following options: "--reference" and "--mlst"') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
376 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
377 reference_file = os.path.abspath(args.reference.name) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
378 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
379 # Run ReMatCh for each sample |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
380 print('\n' + 'STARTING ReMatCh' + '\n') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
381 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
382 # Clean sequences headers |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
383 reference_file, gene_list_reference, reference_dict = clean_headers_reference_file(reference_file, workdir, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
384 args.extraSeq) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
385 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
386 if args.mlst is not None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
387 problem_genes = False |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
388 for header in mlst_sequences: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
389 if header not in gene_list_reference: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
390 print('MLST gene {header} not found between reference sequences'.format(header=header)) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
391 problem_genes = True |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
392 if problem_genes: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
393 sys.exit('Missing MLST genes from reference sequences (at least sequences names do not match)!') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
394 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
395 if len(gene_list_reference) == 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
396 sys.exit('No sequences left') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
397 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
398 # To use in combined report |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
399 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
400 number_samples_successfully = 0 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
401 genes_present_coverage_depth = {} |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
402 genes_present_sequence_coverage = {} |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
403 for sample in list_ids: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
404 sample_start_time = time.time() |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
405 print('\n\n' + 'Sample ID: ' + sample) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
406 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
407 # Create sample outdir |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
408 sample_outdir = os.path.join(workdir, sample, '') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
409 if not os.path.isdir(sample_outdir): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
410 os.mkdir(sample_outdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
411 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
412 run_successfully_fastq = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
413 time_taken_fastq = 0 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
414 sequencing_information = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
415 'library_layout': None, 'library_source': None, 'extra_run_accession': None, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
416 'nominal_length': None, 'read_count': None, 'base_count': None, 'date_download': None} |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
417 if not searched_fastq_files: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
418 # Download Files |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
419 time_taken_fastq, run_successfully_fastq, fastq_files, sequencing_information = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
420 download.run_download(sample, args.downloadLibrariesType, aspera_key, sample_outdir, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
421 args.downloadCramBam, args.threads, args.downloadInstrumentPlatform, args.SRA, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
422 args.SRAopt) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
423 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
424 fastq_files = list_ids[sample] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
425 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
426 file_size = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
427 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
428 run_successfully_rematch_first = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
429 run_successfully_rematch_second = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
430 time_taken_rematch_first = 0 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
431 time_taken_rematch_second = 0 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
432 sample_data_general_first = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
433 sample_data_general_second = None |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
434 if run_successfully_fastq is not False: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
435 file_size = sum(os.path.getsize(fastq) for fastq in fastq_files) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
436 # Run ReMatCh |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
437 time_taken_rematch_first, run_successfully_rematch_first, data_by_gene, sample_data_general_first, \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
438 consensus_files, consensus_sequences = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
439 rematch_module.run_rematch_module(sample, fastq_files, reference_file, args.threads, sample_outdir, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
440 args.extraSeq, args.minCovPresence, args.minCovCall, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
441 args.minFrequencyDominantAllele, args.minGeneCoverage, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
442 args.debug, args.numMapLoc, args.minGeneIdentity, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
443 'first', args.softClip_baseQuality, args.softClip_recodeRun, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
444 reference_dict, args.softClip_cigarFlagRecode, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
445 args.bowtieAlgo, args.bowtieOPT, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
446 gene_list_reference, args.notWriteConsensus, clean_run=True) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
447 if run_successfully_rematch_first: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
448 if args.mlst is not None and (args.mlstRun == 'first' or args.mlstRun == 'all'): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
449 run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'first', workdir, time_str) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
450 genes_present_coverage_depth = write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
451 data_by_gene, workdir, time_str, 'first_run', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
452 args.minGeneIdentity, 'coverage_depth', args.summary, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
453 genes_present_coverage_depth) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
454 if args.reportSequenceCoverage: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
455 genes_present_sequence_coverage = write_data_by_gene(gene_list_reference, args.minGeneCoverage, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
456 sample, data_by_gene, workdir, time_str, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
457 'first_run', args.minGeneIdentity, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
458 'sequence_coverage', args.summary, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
459 genes_present_sequence_coverage) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
460 if args.doubleRun: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
461 rematch_second_outdir = os.path.join(sample_outdir, 'rematch_second_run', '') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
462 if not os.path.isdir(rematch_second_outdir): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
463 os.mkdir(rematch_second_outdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
464 consensus_concatenated_fasta, consensus_concatenated_gene_list, consensus_concatenated_dict, \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
465 number_consensus_with_sequences = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
466 concatenate_extra_seq_2_consensus(consensus_files['noMatter'], reference_file, args.extraSeq, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
467 rematch_second_outdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
468 if len(consensus_concatenated_gene_list) > 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
469 if args.mlst is None or \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
470 (args.mlst is not None and number_consensus_with_sequences == len(gene_list_reference)): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
471 time_taken_rematch_second, run_successfully_rematch_second, data_by_gene, \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
472 sample_data_general_second, consensus_files, consensus_sequences = \ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
473 rematch_module.run_rematch_module(sample, fastq_files, consensus_concatenated_fasta, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
474 args.threads, rematch_second_outdir, args.extraSeq, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
475 args.minCovPresence, args.minCovCall, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
476 args.minFrequencyDominantAllele, args.minGeneCoverage, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
477 args.debug, args.numMapLoc, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
478 args.minGeneIdentity, 'second', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
479 args.softClip_baseQuality, args.softClip_recodeRun, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
480 consensus_concatenated_dict, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
481 args.softClip_cigarFlagRecode, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
482 args.bowtieAlgo, args.bowtieOPT, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
483 gene_list_reference, args.notWriteConsensus, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
484 clean_run=True) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
485 if not args.debug: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
486 os.remove(consensus_concatenated_fasta) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
487 if run_successfully_rematch_second: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
488 if args.mlst is not None and (args.mlstRun == 'second' or args.mlstRun == 'all'): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
489 run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'second', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
490 workdir, time_str) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
491 _ = write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
492 workdir, time_str, 'second_run', args.minGeneIdentity, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
493 'coverage_depth', False, {}) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
494 if args.reportSequenceCoverage: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
495 _ = write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
496 data_by_gene, workdir, time_str, 'second_run', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
497 args.minGeneIdentity, 'sequence_coverage', False, {}) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
498 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
499 print('Some sequences missing after ReMatCh module first run. Second run will not be' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
500 ' performed') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
501 if os.path.isfile(consensus_concatenated_fasta): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
502 os.remove(consensus_concatenated_fasta) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
503 if os.path.isdir(rematch_second_outdir): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
504 utils.remove_directory(rematch_second_outdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
505 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
506 print('No sequences left after ReMatCh module first run. Second run will not be performed') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
507 if os.path.isfile(consensus_concatenated_fasta): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
508 os.remove(consensus_concatenated_fasta) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
509 if os.path.isdir(rematch_second_outdir): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
510 utils.remove_directory(rematch_second_outdir) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
511 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
512 if not searched_fastq_files and not args.keepDownloadedFastq and fastq_files is not None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
513 for fastq in fastq_files: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
514 if os.path.isfile(fastq): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
515 os.remove(fastq) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
516 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
517 time_taken = utils.run_time(sample_start_time) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
518 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
519 write_sample_report(sample, workdir, time_str, file_size, run_successfully_fastq, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
520 run_successfully_rematch_first, run_successfully_rematch_second, time_taken_fastq, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
521 time_taken_rematch_first, time_taken_rematch_second, time_taken, sequencing_information, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
522 sample_data_general_first if run_successfully_rematch_first else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
523 {'number_absent_genes': None, 'number_genes_multiple_alleles': None, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
524 'mean_sample_coverage': None}, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
525 sample_data_general_second if run_successfully_rematch_second else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
526 {'number_absent_genes': None, 'number_genes_multiple_alleles': None, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
527 'mean_sample_coverage': None}, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
528 fastq_files if fastq_files is not None else '') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
529 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
530 if all([run_successfully_fastq is not False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
531 run_successfully_rematch_first is not False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
532 run_successfully_rematch_second is not False]): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
533 number_samples_successfully += 1 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
534 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
535 if args.summary: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
536 write_summary_report(workdir, 'coverage_depth', time_str, gene_list_reference, genes_present_coverage_depth) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
537 if args.reportSequenceCoverage: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
538 write_summary_report(workdir, 'sequence_coverage', time_str, gene_list_reference, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
539 genes_present_sequence_coverage) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
540 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
541 return number_samples_successfully, len(list_ids) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
542 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
543 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
544 def main(): |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
545 if sys.version_info[0] < 3: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
546 sys.exit('Must be using Python 3. Try calling "python3 rematch.py"') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
547 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
548 parser = argparse.ArgumentParser(prog='rematch.py', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
549 description='Reads mapping against target sequences, checking mapping and' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
550 ' consensus sequences production', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
551 formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
552 parser.add_argument('--version', help='Version information', action='version', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
553 version='{prog} v{version}'.format(prog=parser.prog, version=__version__)) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
554 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
555 parser_optional_general = parser.add_argument_group('General facultative options') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
556 parser_optional_general.add_argument('-r', '--reference', type=argparse.FileType('r'), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
557 metavar='/path/to/reference_sequence.fasta', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
558 help='Fasta file containing reference sequences', required=False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
559 parser_optional_general.add_argument('-w', '--workdir', type=str, metavar='/path/to/workdir/directory/', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
560 help='Path to the directory where ReMatCh will run and produce the outputs' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
561 ' with reads (ended with fastq.gz/fq.gz and, in case of PE data, pair-end' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
562 ' direction coded as _R1_001 / _R2_001 or _1 / _2) already' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
563 ' present (organized in sample folders) or to be downloaded', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
564 required=False, default='.') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
565 parser_optional_general.add_argument('-j', '--threads', type=int, metavar='N', help='Number of threads to use', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
566 required=False, default=1) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
567 parser_optional_general.add_argument('--mlst', type=str, metavar='"Streptococcus agalactiae"', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
568 help='Species name (same as in PubMLST) to be used in MLST' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
569 ' determination. ReMatCh will use Bowtie2 very-sensitive-local mapping' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
570 ' parameters and will recode the soft clip CIGAR flags of the first run', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
571 required=False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
572 parser_optional_general.add_argument('--doNotUseProvidedSoftware', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
573 help='Tells ReMatCh to not use Bowtie2, Samtools and Bcftools that are' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
574 ' provided with it') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
575 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
576 parser_optional_download_exclusive = parser.add_mutually_exclusive_group() |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
577 parser_optional_download_exclusive.add_argument('-l', '--listIDs', type=argparse.FileType('r'), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
578 metavar='/path/to/list_IDs.txt', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
579 help='Path to list containing the IDs to be' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
580 ' downloaded (one per line)', required=False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
581 parser_optional_download_exclusive.add_argument('-t', '--taxon', type=str, metavar='"Streptococcus agalactiae"', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
582 help='Taxon name for which ReMatCh will download fastq files', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
583 required=False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
584 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
585 parser_optional_rematch = parser.add_argument_group('ReMatCh module facultative options') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
586 parser_optional_rematch.add_argument('--extraSeq', type=int, metavar='N', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
587 help='Sequence length added to both ends of target sequences (usefull to' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
588 ' improve reads mapping to the target one) that will be trimmed in' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
589 ' ReMatCh outputs', required=False, default=0) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
590 parser_optional_rematch.add_argument('--minCovPresence', type=int, metavar='N', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
591 help='Reference position minimum coverage depth to consider the position to be' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
592 ' present in the sample', required=False, default=5) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
593 parser_optional_rematch.add_argument('--minCovCall', type=int, metavar='N', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
594 help='Reference position minimum coverage depth to perform a base call. Lower' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
595 ' coverage will be coded as N', required=False, default=10) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
596 parser_optional_rematch.add_argument('--minFrequencyDominantAllele', type=float, metavar='0.6', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
597 help='Minimum relative frequency of the dominant allele coverage depth (value' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
598 ' between [0, 1]). Positions with lower values will be considered as' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
599 ' having multiple alleles (and will be coded as N)', required=False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
600 default=0.6) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
601 parser_optional_rematch.add_argument('--minGeneCoverage', type=int, metavar='N', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
602 help='Minimum percentage of target reference gene sequence covered' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
603 ' by --minCovPresence to consider a gene to be present (value' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
604 ' between [0, 100])', required=False, default=70) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
605 parser_optional_rematch.add_argument('--minGeneIdentity', type=int, metavar='N', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
606 help='Minimum percentage of identity of reference gene sequence covered' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
607 ' by --minCovCall to consider a gene to be present (value' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
608 ' between [0, 100]). One INDEL will be considered as one difference', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
609 required=False, default=80) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
610 parser_optional_rematch.add_argument('--numMapLoc', type=int, metavar='N', help=argparse.SUPPRESS, required=False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
611 default=1) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
612 # parser_optional_rematch.add_argument('--numMapLoc', type=int, metavar='N', help='Maximum number of locations to which a read can map (sometimes useful when mapping against similar sequences)', required=False, default=1) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
613 parser_optional_rematch.add_argument('--doubleRun', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
614 help='Tells ReMatCh to run a second time using as reference the noMatter' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
615 ' consensus sequence produced in the first run. This will improve' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
616 ' consensus sequence determination for sequences with high percentage of' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
617 ' target reference gene sequence covered') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
618 parser_optional_rematch.add_argument('--reportSequenceCoverage', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
619 help='Produce an extra combined_report.data_by_gene with the sequence coverage' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
620 ' instead of coverage depth') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
621 parser_optional_rematch.add_argument('--summary', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
622 help='Produce extra report files containing only sequences present in at least' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
623 ' one sample (usefull when using a large number of reference' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
624 ' sequences, and only for first run)') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
625 parser_optional_rematch.add_argument('--notWriteConsensus', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
626 help='Do not write consensus sequences') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
627 parser_optional_rematch.add_argument('--bowtieAlgo', type=str, metavar='"--very-sensitive-local"', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
628 help='Bowtie2 alignment mode. It can be an end-to-end alignment (unclipped' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
629 ' alignment) or local alignment (soft clipped alignment). Also, can' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
630 ' choose between fast or sensitive alignments. Please check Bowtie2' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
631 ' manual for extra' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
632 ' information: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml .' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
633 ' This option should be provided between quotes and starting with' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
634 ' an empty space (like --bowtieAlgo " --very-fast") or using equal' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
635 ' sign (like --bowtieAlgo="--very-fast")', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
636 required=False, default='--very-sensitive-local') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
637 parser_optional_rematch.add_argument('--bowtieOPT', type=str, metavar='"--no-mixed"', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
638 help='Extra Bowtie2 options. This option should be provided between quotes and' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
639 ' starting with an empty space (like --bowtieOPT " --no-mixed") or using' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
640 ' equal sign (like --bowtieOPT="--no-mixed")', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
641 required=False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
642 parser_optional_rematch.add_argument('--debug', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
643 help='DeBug Mode: do not remove temporary files') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
644 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
645 parser_optional_mlst = parser.add_argument_group('MLST facultative options') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
646 parser_optional_rematch.add_argument('--mlstReference', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
647 help='If the curated scheme for MLST alleles is available, tells ReMatCh to' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
648 ' use these as reference (force Bowtie2 to run with very-sensitive-local' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
649 ' parameters, and sets --extraSeq to 200), otherwise ReMatCh uses the' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
650 ' first alleles of each MLST gene fragment in PubMLST as reference' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
651 ' sequences (force Bowtie2 to run with very-sensitive-local parameters,' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
652 ' and sets --extraSeq to 0)') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
653 parser_optional_mlst.add_argument('--mlstSchemaNumber', type=int, metavar='N', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
654 help='Number of the species PubMLST schema to be used in case of multiple schemes' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
655 ' available (by default will use the first schema)', required=False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
656 parser_optional_mlst.add_argument('--mlstConsensus', choices=['noMatter', 'correct', 'alignment', 'all'], type=str, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
657 metavar='noMatter', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
658 help='Consensus sequence to be used in MLST' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
659 ' determination (available options: %(choices)s)', required=False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
660 default='noMatter') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
661 parser_optional_mlst.add_argument('--mlstRun', choices=['first', 'second', 'all'], type=str, metavar='first', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
662 help='ReMatCh run outputs to be used in MLST determination (available' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
663 ' options: %(choices)s)', required=False, default='all') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
664 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
665 parser_optional_download = parser.add_argument_group('Download facultative options') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
666 parser_optional_download.add_argument('-a', '--asperaKey', type=argparse.FileType('r'), |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
667 metavar='/path/to/asperaweb_id_dsa.openssh', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
668 help='Tells ReMatCh to download fastq files from ENA using Aspera' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
669 ' Connect. With this option, the path to Private-key file' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
670 ' asperaweb_id_dsa.openssh must be provided (normaly found in' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
671 ' ~/.aspera/connect/etc/asperaweb_id_dsa.openssh).', required=False) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
672 parser_optional_download.add_argument('-k', '--keepDownloadedFastq', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
673 help='Tells ReMatCh to keep the fastq files downloaded') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
674 parser_optional_download.add_argument('--downloadLibrariesType', type=str, metavar='PAIRED', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
675 help='Tells ReMatCh to download files with specific library' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
676 ' layout (available options: %(choices)s)', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
677 choices=['PAIRED', 'SINGLE', 'BOTH'], required=False, default='BOTH') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
678 parser_optional_download.add_argument('--downloadInstrumentPlatform', type=str, metavar='ILLUMINA', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
679 help='Tells ReMatCh to download files with specific library layout (available' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
680 ' options: %(choices)s)', choices=['ILLUMINA', 'ALL'], required=False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
681 default='ILLUMINA') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
682 parser_optional_download.add_argument('--downloadCramBam', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
683 help='Tells ReMatCh to also download cram/bam files and convert them to fastq' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
684 ' files') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
685 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
686 parser_optional_sra = parser.add_mutually_exclusive_group() |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
687 parser_optional_sra.add_argument('--SRA', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
688 help='Tells getSeqENA.py to download reads in fastq format only from NCBI SRA' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
689 ' database (not recommended)') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
690 parser_optional_sra.add_argument('--SRAopt', action='store_true', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
691 help='Tells getSeqENA.py to download reads from NCBI SRA if the download from ENA' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
692 ' fails') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
693 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
694 parser_optional_soft_clip = parser.add_argument_group('Soft clip facultative options') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
695 parser_optional_soft_clip.add_argument('--softClip_baseQuality', type=int, metavar='N', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
696 help='Base quality phred score in reads soft clipped regions', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
697 required=False, |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
698 default=7) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
699 parser_optional_soft_clip.add_argument('--softClip_recodeRun', type=str, metavar='first', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
700 help='ReMatCh run to recode soft clipped regions (available' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
701 ' options: %(choices)s)', choices=['first', 'second', 'both', 'none'], |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
702 required=False, default='none') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
703 parser_optional_soft_clip.add_argument('--softClip_cigarFlagRecode', type=str, metavar='M', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
704 help='CIGAR flag to recode CIGAR soft clip (available options: %(choices)s)', |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
705 choices=['M', 'I', 'X'], required=False, default='X') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
706 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
707 args = parser.parse_args() |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
708 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
709 msg = [] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
710 if args.reference is None and not args.mlstReference: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
711 msg.append('At least --reference or --mlstReference should be provided') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
712 elif args.reference is not None and args.mlstReference: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
713 msg.append('Only --reference or --mlstReference should be provided') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
714 else: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
715 if args.mlstReference: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
716 if args.mlst is None: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
717 msg.append('Please provide species name using --mlst') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
718 if args.minFrequencyDominantAllele < 0 or args.minFrequencyDominantAllele > 1: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
719 msg.append('--minFrequencyDominantAllele should be a value between [0, 1]') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
720 if args.minGeneCoverage < 0 or args.minGeneCoverage > 100: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
721 msg.append('--minGeneCoverage should be a value between [0, 100]') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
722 if args.minGeneIdentity < 0 or args.minGeneIdentity > 100: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
723 msg.append('--minGeneIdentity should be a value between [0, 100]') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
724 if args.notWriteConsensus and args.doubleRun: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
725 msg.append('--notWriteConsensus and --doubleRun cannot be used together.' |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
726 ' Maybe you only want to use --doubleRun') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
727 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
728 if len(msg) > 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
729 argparse.ArgumentParser.error('\n'.join(msg)) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
730 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
731 start_time = time.time() |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
732 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
733 number_samples_successfully, samples_total_number = run_rematch(args) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
734 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
735 print('\n' + 'END ReMatCh') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
736 print('\n' + |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
737 str(number_samples_successfully) + ' samples out of ' + str(samples_total_number) + ' run successfully') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
738 time_taken = utils.run_time(start_time) |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
739 del time_taken |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
740 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
741 if number_samples_successfully == 0: |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
742 sys.exit('No samples run successfully!') |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
743 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
744 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
745 if __name__ == "__main__": |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
746 main() |