annotate scripts/ReMatCh/rematch.py @ 5:f739b302bc9a draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Wed, 22 Jan 2020 08:12:43 -0500
parents e37910d2c794
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
1 #!/usr/bin/env python
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
2
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
3 # -*- coding: utf-8 -*-
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
4
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
5 """
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
6 rematch.py - Reads mapping against target sequences, checking mapping
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
7 and consensus sequences production
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
8 <https://github.com/B-UMMI/ReMatCh/>
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
9
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
10 Copyright (C) 2017 Miguel Machado <mpmachado@medicina.ulisboa.pt>
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
11
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
12 Last modified: April 12, 2017
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
13
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
14 This program is free software: you can redistribute it and/or modify
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
15 it under the terms of the GNU General Public License as published by
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
16 the Free Software Foundation, either version 3 of the License, or
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
17 (at your option) any later version.
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
18
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
19 This program is distributed in the hope that it will be useful,
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
22 GNU General Public License for more details.
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
23
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
24 You should have received a copy of the GNU General Public License
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
25 along with this program. If not, see <http://www.gnu.org/licenses/>.
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
26 """
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
27
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
28 import os
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
29 import sys
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
30 import time
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
31 import argparse
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
32 import modules.utils as utils
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
33 import modules.seqFromWebTaxon as seqFromWebTaxon
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
34 import modules.download as download
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
35 import modules.rematch_module as rematch_module
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
36 import modules.checkMLST as checkMLST
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
37
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
38
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
39 version = '3.2'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
40
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
41
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
42 def searchFastqFiles(directory):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
43 filesExtensions = ['.fastq.gz', '.fq.gz']
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
44 pairEnd_filesSeparation = [['_R1_001.f', '_R2_001.f'], ['_1.f', '_2.f']]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
45
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
46 listIDs = {}
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
47 directories = [d for d in os.listdir(directory) if not d.startswith('.') and os.path.isdir(os.path.join(directory, d, ''))]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
48 for directory_found in directories:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
49 if directory_found != 'pubmlst':
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
50 directory_path = os.path.join(directory, directory_found, '')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
51
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
52 fastqFound = []
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
53 files = [f for f in os.listdir(directory_path) if not f.startswith('.') and os.path.isfile(os.path.join(directory_path, f))]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
54 for file_found in files:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
55 if file_found.endswith(tuple(filesExtensions)):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
56 fastqFound.append(file_found)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
57
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
58 if len(fastqFound) == 1:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
59 listIDs[directory_found] = [os.path.join(directory_path, f) for f in fastqFound]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
60 elif len(fastqFound) >= 2:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
61 file_pair = []
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
62
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
63 # Search pairs
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
64 for PE_separation in pairEnd_filesSeparation:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
65 for fastq in fastqFound:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
66 if PE_separation[0] in fastq or PE_separation[1] in fastq:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
67 file_pair.append(fastq)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
68
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
69 if len(file_pair) == 2:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
70 break
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
71 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
72 file_pair = []
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
73
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
74 # Search single
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
75 if len(file_pair) == 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
76 for PE_separation in pairEnd_filesSeparation:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
77 for fastq in fastqFound:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
78 if PE_separation[0] not in fastq or PE_separation[1] not in fastq:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
79 file_pair.append(fastq)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
80
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
81 if len(file_pair) >= 1:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
82 file_pair = file_pair[0]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
83
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
84 if len(file_pair) >= 1:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
85 listIDs[directory_found] = [os.path.join(directory_path, f) for f in file_pair]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
86
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
87 return listIDs
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
88
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
89
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
90 def getListIDs_fromFile(fileListIDs):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
91 list_ids = []
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
92
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
93 with open(fileListIDs, 'rtU') as lines:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
94 for line in lines:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
95 line = line.splitlines()[0]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
96 if len(line) > 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
97 list_ids.append(line)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
98
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
99 if len(list_ids) == 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
100 sys.exit('No runIDs were found in ' + fileListIDs)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
101
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
102 return list_ids
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
103
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
104
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
105 def getTaxonRunIDs(taxon_name, outputfile):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
106 seqFromWebTaxon.runSeqFromWebTaxon(taxon_name, outputfile, True, True, True, False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
107
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
108 runIDs = []
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
109 with open(outputfile, 'rtU') as reader:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
110 for line in reader:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
111 line = line.splitlines()[0]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
112 if len(line) > 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
113 if not line.startswith('#'):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
114 line = line.split('\t')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
115 runIDs.append(line[0])
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
116
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
117 return runIDs
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
118
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
119
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
120 def getListIDs(workdir, fileListIDs, taxon_name):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
121 searched_fastq_files = False
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
122 listIDs = []
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
123 if fileListIDs is None and taxon_name is None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
124 listIDs = searchFastqFiles(workdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
125 searched_fastq_files = True
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
126 elif fileListIDs is not None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
127 listIDs = getListIDs_fromFile(os.path.abspath(fileListIDs))
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
128 elif taxon_name is not None and fileListIDs is None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
129 listIDs = getTaxonRunIDs(taxon_name, os.path.join(workdir, 'IDs_list.seqFromWebTaxon.tab'))
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
130
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
131 if len(listIDs) == 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
132 sys.exit('No IDs were found')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
133 return listIDs, searched_fastq_files
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
134
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
135
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
136 def format_gene_info(gene_specific_info, minimum_gene_coverage, minimum_gene_identity, reported_data_type):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
137 info = None
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
138 if gene_specific_info['gene_coverage'] >= minimum_gene_coverage and gene_specific_info['gene_identity'] >= minimum_gene_identity:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
139 if gene_specific_info['gene_number_positions_multiple_alleles'] == 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
140 info = str(gene_specific_info[reported_data_type])
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
141 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
142 info = 'multiAlleles_' + str(gene_specific_info[reported_data_type])
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
143 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
144 info = 'absent_' + str(gene_specific_info[reported_data_type])
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
145
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
146 return info
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
147
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
148
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
149 def write_data_by_gene(gene_list_reference, minimum_gene_coverage, sample, data_by_gene, outdir, time_str, run_times, minimum_gene_identity, reported_data_type):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
150 combined_report = os.path.join(outdir, 'combined_report.data_by_gene.' + run_times + '.' + reported_data_type + '.' + time_str + '.tab')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
151
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
152 if reported_data_type == 'coverage_depth':
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
153 reported_data_type = 'gene_mean_read_coverage'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
154 elif reported_data_type == 'sequence_coverage':
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
155 reported_data_type = 'gene_coverage'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
156
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
157 combined_report_exist = os.path.isfile(combined_report)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
158 with open(combined_report, 'at') as writer:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
159 seq_list = gene_list_reference.keys()
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
160 if not combined_report_exist:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
161 writer.write('#sample' + '\t' + '\t'.join([gene_list_reference[seq] for seq in seq_list]) + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
162
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
163 results = {}
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
164 headers = []
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
165 for i in data_by_gene:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
166 results[data_by_gene[i]['header']] = format_gene_info(data_by_gene[i], minimum_gene_coverage, minimum_gene_identity, reported_data_type)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
167 headers.append(data_by_gene[i]['header'])
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
168
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
169 if len(headers) != gene_list_reference:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
170 for gene in gene_list_reference:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
171 if gene not in headers:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
172 results[gene] = 'NA'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
173
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
174 writer.write(sample + '\t' + '\t'.join([results[seq] for seq in seq_list]) + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
175
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
176
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
177 def write_sample_report(sample, outdir, time_str, fileSize, run_successfully_fastq, run_successfully_rematch_first, run_successfully_rematch_second, time_taken_fastq, time_taken_rematch_first, time_taken_rematch_second, time_taken_sample, sequencingInformation, sample_data_general_first, sample_data_general_second, fastq_used):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
178 sample_report = os.path.join(outdir, 'sample_report.' + time_str + '.tab')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
179 report_exist = os.path.isfile(sample_report)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
180
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
181 header_general = ['sample', 'sample_run_successfully', 'sample_run_time', 'files_size', 'download_run_successfully', 'download_run_time', 'rematch_run_successfully_first', 'rematch_run_time_first', 'rematch_run_successfully_second', 'rematch_run_time_second']
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
182 header_data_general = ['number_absent_genes', 'number_genes_multiple_alleles', 'mean_sample_coverage']
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
183 header_sequencing = ['run_accession', 'instrument_platform', 'instrument_model', 'library_layout', 'library_source', 'extra_run_accession', 'nominal_length', 'read_count', 'base_count', 'date_download']
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
184
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
185 with open(sample_report, 'at') as writer:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
186 if not report_exist:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
187 writer.write('#' + '\t'.join(header_general) + '\t' + '_first\t'.join(header_data_general) + '_first\t' + '_second\t'.join(header_data_general) + '_second\t' + '\t'.join(header_sequencing) + '\t' + 'fastq_used' + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
188
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
189 writer.write('\t'.join([sample, str(all([run_successfully_fastq is not False, run_successfully_rematch_first is not False, run_successfully_rematch_second is not False])), str(time_taken_sample), str(fileSize), str(run_successfully_fastq), str(time_taken_fastq), str(run_successfully_rematch_first), str(time_taken_rematch_first), str(run_successfully_rematch_second), str(time_taken_rematch_second)]) + '\t' + '\t'.join([str(sample_data_general_first[i]) for i in header_data_general]) + '\t' + '\t'.join([str(sample_data_general_second[i]) for i in header_data_general]) + '\t' + '\t'.join([str(sequencingInformation[i]) for i in header_sequencing]) + '\t' + ','.join(fastq_used) + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
190
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
191
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
192 def concatenate_extraSeq_2_consensus(consensus_sequence, reference_sequence, extraSeq_length, outdir):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
193 reference_dict, ignore, ignore = rematch_module.get_sequence_information(reference_sequence, extraSeq_length)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
194 consensus_dict, genes, ignore = rematch_module.get_sequence_information(consensus_sequence, 0)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
195 for k, values_consensus in consensus_dict.items():
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
196 for values_reference in reference_dict.values():
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
197 if values_reference['header'] == values_consensus['header']:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
198 if extraSeq_length <= len(values_reference['sequence']):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
199 right_extra_seq = '' if extraSeq_length == 0 else values_reference['sequence'][-extraSeq_length:]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
200 consensus_dict[k]['sequence'] = values_reference['sequence'][:extraSeq_length] + consensus_dict[k]['sequence'] + right_extra_seq
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
201 consensus_dict[k]['length'] += extraSeq_length + len(right_extra_seq)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
202
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
203 consensus_concatenated = os.path.join(outdir, 'consensus_concatenated_extraSeq.fasta')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
204 with open(consensus_concatenated, 'wt') as writer:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
205 for i in consensus_dict:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
206 writer.write('>' + consensus_dict[i]['header'] + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
207 fasta_sequence_lines = rematch_module.chunkstring(consensus_dict[i]['sequence'], 80)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
208 for line in fasta_sequence_lines:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
209 writer.write(line + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
210
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
211 return consensus_concatenated, genes, consensus_dict
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
212
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
213
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
214 def clean_headers_reference_file(reference_file, outdir, extraSeq):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
215 problematic_characters = ["|", " ", ",", ".", "(", ")", "'", "/", ":"]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
216 print 'Checking if reference sequences contain ' + str(problematic_characters) + '\n'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
217 headers_changed = False
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
218 new_reference_file = str(reference_file)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
219 sequences, genes, headers_changed = rematch_module.get_sequence_information(reference_file, extraSeq)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
220 if headers_changed:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
221 print 'At least one of the those characters was found. Replacing those with _' + '\n'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
222 new_reference_file = os.path.join(outdir, os.path.splitext(os.path.basename(reference_file))[0] + '.headers_renamed.fasta')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
223 with open(new_reference_file, 'wt') as writer:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
224 for i in sequences:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
225 writer.write('>' + sequences[i]['header'] + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
226 fasta_sequence_lines = rematch_module.chunkstring(sequences[i]['sequence'], 80)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
227 for line in fasta_sequence_lines:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
228 writer.write(line + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
229 return new_reference_file, genes, sequences
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
230
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
231
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
232 def write_mlst_report(sample, run_times, consensus_type, st, alleles_profile, lociOrder, outdir, time_str):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
233 mlst_report = os.path.join(outdir, 'mlst_report.' + time_str + '.tab')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
234 mlst_report_exist = os.path.isfile(mlst_report)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
235 with open(mlst_report, 'at') as writer:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
236 if not mlst_report_exist:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
237 writer.write('\t'.join(['#sample', 'ReMatCh_run', 'consensus_type', 'ST'] + lociOrder) + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
238 writer.write('\t'.join([sample, run_times, consensus_type, str(st)] + alleles_profile.split(',')) + '\n')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
239
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
240
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
241 def run_get_st(sample, mlst_dicts, consensus_sequences, mlstConsensus, run_times, outdir, time_str):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
242 if mlstConsensus == 'all':
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
243 for consensus_type in consensus_sequences:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
244 print 'Searching MLST for ' + consensus_type + ' consensus'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
245 st, alleles_profile = checkMLST.getST(mlst_dicts, consensus_sequences[consensus_type])
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
246 write_mlst_report(sample, run_times, consensus_type, st, alleles_profile, mlst_dicts[2], outdir, time_str)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
247 print 'ST found: ' + str(st) + ' (' + alleles_profile + ')'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
248 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
249 st, alleles_profile = checkMLST.getST(mlst_dicts, consensus_sequences[mlstConsensus])
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
250 write_mlst_report(sample, run_times, mlstConsensus, st, alleles_profile, mlst_dicts[2], outdir, time_str)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
251 print 'ST found for ' + mlstConsensus + ' consensus: ' + str(st) + ' (' + alleles_profile + ')'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
252
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
253
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
254 def runRematch(args):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
255 workdir = os.path.abspath(args.workdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
256 if not os.path.isdir(workdir):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
257 os.makedirs(workdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
258
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
259 asperaKey = os.path.abspath(args.asperaKey.name) if args.asperaKey is not None else None
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
260
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
261 # Start logger
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
262 logfile, time_str = utils.start_logger(workdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
263
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
264 # Get general information
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
265 script_path = utils.general_information(logfile, version, workdir, time_str, args.doNotUseProvidedSoftware, asperaKey, args.downloadCramBam)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
266
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
267 # Set listIDs
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
268 listIDs, searched_fastq_files = getListIDs(workdir, args.listIDs.name if args.listIDs is not None else None, args.taxon)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
269
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
270 if args.mlst is not None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
271 time_taken_PubMLST, mlst_dicts, mlst_sequences = checkMLST.downloadPubMLSTxml(args.mlst, args.mlstSchemaNumber, workdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
272 args.softClip_recodeRun = 'first'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
273 args.conservedSeq = False
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
274
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
275 if args.reference is None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
276 reference_file = checkMLST.check_existing_schema(args.mlst, args.mlstSchemaNumber, script_path)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
277 args.extraSeq = 200
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
278 if reference_file is None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
279 print 'It was not found provided MLST scheme sequences for ' + args.mlst
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
280 print 'Trying to obtain reference MLST sequences from PubMLST'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
281 if len(mlst_sequences) > 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
282 reference_file = checkMLST.write_mlst_reference(args.mlst, mlst_sequences, workdir, time_str)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
283 args.extraSeq = 0
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
284 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
285 sys.exit('It was not possible to download MLST sequences from PubMLST!')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
286 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
287 print 'Using provided scheme as referece: ' + reference_file
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
288 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
289 reference_file = os.path.abspath(args.reference.name)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
290
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
291 # Run ReMatCh for each sample
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
292 print '\n' + 'STARTING ReMatCh' + '\n'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
293
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
294 # Clean sequences headers
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
295 reference_file, gene_list_reference, reference_dict = clean_headers_reference_file(reference_file, workdir, args.extraSeq)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
296
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
297 if args.mlst is not None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
298 problem_genes = False
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
299 for header in mlst_sequences:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
300 if header not in gene_list_reference:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
301 print 'MLST gene {header} not found between reference sequences'.format(header=header)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
302 problem_genes = True
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
303 if problem_genes:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
304 sys.exit('Missing MLST genes from reference sequences (at least sequences names do not match)!')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
305
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
306 if len(gene_list_reference) == 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
307 sys.exit('No sequences left')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
308
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
309 # To use in combined report
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
310
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
311 number_samples_successfully = 0
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
312 for sample in listIDs:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
313 sample_start_time = time.time()
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
314 print '\n\n' + 'Sample ID: ' + sample
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
315
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
316 # Create sample outdir
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
317 sample_outdir = os.path.join(workdir, sample, '')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
318 if not os.path.isdir(sample_outdir):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
319 os.mkdir(sample_outdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
320
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
321 run_successfully_fastq = None
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
322 time_taken_fastq = 0
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
323 sequencingInformation = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, 'library_layout': None, 'library_source': None, 'extra_run_accession': None, 'nominal_length': None, 'read_count': None, 'base_count': None, 'date_download': None}
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
324 if not searched_fastq_files:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
325 # Download Files
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
326 time_taken_fastq, run_successfully_fastq, fastq_files, sequencingInformation = download.runDownload(sample, args.downloadLibrariesType, asperaKey, sample_outdir, args.downloadCramBam, args.threads, args.downloadInstrumentPlatform)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
327 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
328 fastq_files = listIDs[sample]
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
329
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
330 fileSize = None
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
331
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
332 run_successfully_rematch_first = None
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
333 run_successfully_rematch_second = None
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
334 time_taken_rematch_first = 0
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
335 time_taken_rematch_second = 0
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
336 if run_successfully_fastq is not False:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
337 fileSize = sum(os.path.getsize(fastq) for fastq in fastq_files)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
338 # Run ReMatCh
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
339 time_taken_rematch_first, run_successfully_rematch_first, data_by_gene, sample_data_general_first, consensus_files, consensus_sequences = rematch_module.runRematchModule(sample, fastq_files, reference_file, args.threads, sample_outdir, args.extraSeq, args.minCovPresence, args.minCovCall, args.minFrequencyDominantAllele, args.minGeneCoverage, args.conservedSeq, args.debug, args.numMapLoc, args.minGeneIdentity, 'first', args.softClip_baseQuality, args.softClip_recodeRun, reference_dict, args.softClip_cigarFlagRecode, args.bowtieOPT, gene_list_reference, args.notWriteConsensus)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
340 if run_successfully_rematch_first:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
341 if args.mlst is not None and (args.mlstRun == 'first' or args.mlstRun == 'all'):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
342 run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'first', workdir, time_str)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
343 write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'first_run', args.minGeneIdentity, 'coverage_depth')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
344 if args.reportSequenceCoverage:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
345 write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'first_run', args.minGeneIdentity, 'sequence_coverage')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
346 if args.doubleRun:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
347 rematch_second_outdir = os.path.join(sample_outdir, 'rematch_second_run', '')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
348 if not os.path.isdir(rematch_second_outdir):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
349 os.mkdir(rematch_second_outdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
350 consensus_concatenated_fasta, consensus_concatenated_gene_list, consensus_concatenated_dict = concatenate_extraSeq_2_consensus(consensus_files['noMatter'], reference_file, args.extraSeq, rematch_second_outdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
351 if len(consensus_concatenated_gene_list) > 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
352 time_taken_rematch_second, run_successfully_rematch_second, data_by_gene, sample_data_general_second, consensus_files, consensus_sequences = rematch_module.runRematchModule(sample, fastq_files, consensus_concatenated_fasta, args.threads, rematch_second_outdir, args.extraSeq, args.minCovPresence, args.minCovCall, args.minFrequencyDominantAllele, args.minGeneCoverage, args.conservedSeq, args.debug, args.numMapLoc, args.minGeneIdentity, 'second', args.softClip_baseQuality, args.softClip_recodeRun, consensus_concatenated_dict, args.softClip_cigarFlagRecode, args.bowtieOPT, gene_list_reference, args.notWriteConsensus)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
353 if not args.debug:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
354 os.remove(consensus_concatenated_fasta)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
355 if run_successfully_rematch_second:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
356 if args.mlst is not None and (args.mlstRun == 'second' or args.mlstRun == 'all'):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
357 run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'second', workdir, time_str)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
358 write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'second_run', args.minGeneIdentity, 'coverage_depth')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
359 if args.reportSequenceCoverage:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
360 write_data_by_gene(gene_list_reference, args.minGeneCoverage, sample, data_by_gene, workdir, time_str, 'second_run', args.minGeneIdentity, 'sequence_coverage')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
361 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
362 print 'No sequences left after ReMatCh module first run. Second run will not be performed'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
363 if os.path.isfile(consensus_concatenated_fasta):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
364 os.remove(consensus_concatenated_fasta)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
365 if os.path.isdir(rematch_second_outdir):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
366 utils.removeDirectory(rematch_second_outdir)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
367
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
368 if not searched_fastq_files and not args.keepDownloadedFastq and fastq_files is not None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
369 for fastq in fastq_files:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
370 if os.path.isfile(fastq):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
371 os.remove(fastq)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
372
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
373 time_taken = utils.runTime(sample_start_time)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
374
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
375 write_sample_report(sample, workdir, time_str, fileSize, run_successfully_fastq, run_successfully_rematch_first, run_successfully_rematch_second, time_taken_fastq, time_taken_rematch_first, time_taken_rematch_second, time_taken, sequencingInformation, sample_data_general_first if run_successfully_rematch_first else {'number_absent_genes': None, 'number_genes_multiple_alleles': None, 'mean_sample_coverage': None}, sample_data_general_second if run_successfully_rematch_second else {'number_absent_genes': None, 'number_genes_multiple_alleles': None, 'mean_sample_coverage': None}, fastq_files if fastq_files is not None else '')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
376
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
377 if all([run_successfully_fastq is not False, run_successfully_rematch_first is not False, run_successfully_rematch_second is not False]):
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
378 number_samples_successfully += 1
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
379
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
380 return number_samples_successfully, len(listIDs)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
381
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
382
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
383 def main():
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
384 parser = argparse.ArgumentParser(prog='rematch.py', description='Reads mapping against target sequences, checking mapping and consensus sequences production', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
385 parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version))
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
386
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
387 parser_optional_general = parser.add_argument_group('General facultative options')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
388 parser_optional_general.add_argument('-r', '--reference', type=argparse.FileType('r'), metavar='/path/to/reference_sequence.fasta', help='Fasta file containing reference sequences', required=False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
389 parser_optional_general.add_argument('-w', '--workdir', type=str, metavar='/path/to/workdir/directory/', help='Path to the directory where ReMatCh will run and produce the outputs with reads (ended with fastq.gz/fq.gz and, in case of PE data, pair-end direction coded as _R1_001 / _R2_001 or _1 / _2) already present (organized in sample folders) or to be downloaded', required=False, default='.')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
390 parser_optional_general.add_argument('-j', '--threads', type=int, metavar='N', help='Number of threads to use', required=False, default=1)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
391 parser_optional_general.add_argument('--mlst', type=str, metavar='"Streptococcus agalactiae"', help='Species name (same as in PubMLST) to be used in MLST determination. ReMatCh will use Bowtie2 very-sensitive-local mapping parameters and will recode the soft clip CIGAR flags of the first run', required=False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
392 parser_optional_general.add_argument('--doNotUseProvidedSoftware', action='store_true', help='Tells ReMatCh to not use Bowtie2, Samtools and Bcftools that are provided with it')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
393
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
394 parser_optional_rematch = parser.add_argument_group('ReMatCh module facultative options')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
395 parser_optional_rematch.add_argument('--conservedSeq', action='store_true', help=argparse.SUPPRESS)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
396 # parser_optional_rematch.add_argument('--conservedSeq', action='store_true', help='This option can be used with conserved sequences like MLST genes to speedup the analysis by alignning reads using Bowtie2 sensitive algorithm')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
397 parser_optional_rematch.add_argument('--extraSeq', type=int, metavar='N', help='Sequence length added to both ends of target sequences (usefull to improve reads mapping to the target one) that will be trimmed in ReMatCh outputs', required=False, default=0)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
398 parser_optional_rematch.add_argument('--minCovPresence', type=int, metavar='N', help='Reference position minimum coverage depth to consider the position to be present in the sample', required=False, default=5)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
399 parser_optional_rematch.add_argument('--minCovCall', type=int, metavar='N', help='Reference position minimum coverage depth to perform a base call. Lower coverage will be coded as N', required=False, default=10)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
400 parser_optional_rematch.add_argument('--minFrequencyDominantAllele', type=float, metavar='0.6', help='Minimum relative frequency of the dominant allele coverage depth (value between [0, 1]). Positions with lower values will be considered as having multiple alleles (and will be coded as N)', required=False, default=0.6)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
401 parser_optional_rematch.add_argument('--minGeneCoverage', type=int, metavar='N', help='Minimum percentage of target reference gene sequence covered by --minCovPresence to consider a gene to be present (value between [0, 100])', required=False, default=70)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
402 parser_optional_rematch.add_argument('--minGeneIdentity', type=int, metavar='N', help='Minimum percentage of identity of reference gene sequence covered by --minCovCall to consider a gene to be present (value between [0, 100]). One INDEL will be considered as one difference', required=False, default=80)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
403 parser_optional_rematch.add_argument('--numMapLoc', type=int, metavar='N', help=argparse.SUPPRESS, required=False, default=1)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
404 # parser_optional_rematch.add_argument('--numMapLoc', type=int, metavar='N', help='Maximum number of locations to which a read can map (sometimes useful when mapping against similar sequences)', required=False, default=1)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
405 parser_optional_rematch.add_argument('--doubleRun', action='store_true', help='Tells ReMatCh to run a second time using as reference the noMatter consensus sequence produced in the first run. This will improve consensus sequence determination for sequences with high percentage of target reference gene sequence covered')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
406 parser_optional_rematch.add_argument('--reportSequenceCoverage', action='store_true', help='Produce an extra combined_report.data_by_gene with the sequence coverage instead of coverage depth')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
407 parser_optional_rematch.add_argument('--notWriteConsensus', action='store_true', help='Do not write consensus sequences')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
408 parser_optional_rematch.add_argument('--bowtieOPT', type=str, metavar='"--no-mixed"', help='Extra Bowtie2 options', required=False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
409 parser_optional_rematch.add_argument('--debug', action='store_true', help='DeBug Mode: do not remove temporary files')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
410
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
411 parser_optional_mlst = parser.add_argument_group('MLST facultative options')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
412 parser_optional_rematch.add_argument('--mlstReference', action='store_true', help='If the curated scheme for MLST alleles is available, tells ReMatCh to use these as reference (force Bowtie2 to run with very-sensitive-local parameters, and sets --extraSeq to 200), otherwise ReMatCh uses the first alleles of each MLST gene fragment in PubMLST as reference sequences (force Bowtie2 to run with very-sensitive-local parameters, and sets --extraSeq to 0)')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
413 parser_optional_mlst.add_argument('--mlstSchemaNumber', type=int, metavar='N', help='Number of the species PubMLST schema to be used in case of multiple schemes available (by default will use the first schema)', required=False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
414 parser_optional_mlst.add_argument('--mlstConsensus', choices=['noMatter', 'correct', 'alignment', 'all'], type=str, metavar='noMatter', help='Consensus sequence to be used in MLST determination (available options: %(choices)s)', required=False, default='noMatter')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
415 parser_optional_mlst.add_argument('--mlstRun', choices=['first', 'second', 'all'], type=str, metavar='first', help='ReMatCh run outputs to be used in MLST determination (available options: %(choices)s)', required=False, default='all')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
416
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
417 parser_optional_download = parser.add_argument_group('Download facultative options')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
418 parser_optional_download.add_argument('-a', '--asperaKey', type=argparse.FileType('r'), metavar='/path/to/asperaweb_id_dsa.openssh', help='Tells ReMatCh to download fastq files from ENA using Aspera Connect. With this option, the path to Private-key file asperaweb_id_dsa.openssh must be provided (normaly found in ~/.aspera/connect/etc/asperaweb_id_dsa.openssh).', required=False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
419 parser_optional_download.add_argument('-k', '--keepDownloadedFastq', action='store_true', help='Tells ReMatCh to keep the fastq files downloaded')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
420 parser_optional_download.add_argument('--downloadLibrariesType', type=str, metavar='PAIRED', help='Tells ReMatCh to download files with specific library layout (available options: %(choices)s)', choices=['PAIRED', 'SINGLE', 'BOTH'], required=False, default='BOTH')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
421 parser_optional_download.add_argument('--downloadInstrumentPlatform', type=str, metavar='ILLUMINA', help='Tells ReMatCh to download files with specific library layout (available options: %(choices)s)', choices=['ILLUMINA', 'ALL'], required=False, default='ILLUMINA')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
422 parser_optional_download.add_argument('--downloadCramBam', action='store_true', help='Tells ReMatCh to also download cram/bam files and convert them to fastq files')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
423
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
424 parser_optional_softClip = parser.add_argument_group('Soft clip facultative options')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
425 parser_optional_softClip.add_argument('--softClip_baseQuality', type=int, metavar='N', help='Base quality phred score in reads soft clipped regions', required=False, default=7)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
426 parser_optional_download.add_argument('--softClip_recodeRun', type=str, metavar='first', help='ReMatCh run to recode soft clipped regions (available options: %(choices)s)', choices=['first', 'second', 'both', 'none'], required=False, default='none')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
427 parser_optional_download.add_argument('--softClip_cigarFlagRecode', type=str, metavar='M', help='CIGAR flag to recode CIGAR soft clip (available options: %(choices)s)', choices=['M', 'I', 'X'], required=False, default='X')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
428
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
429 parser_optional_download_exclusive = parser.add_mutually_exclusive_group()
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
430 parser_optional_download_exclusive.add_argument('-l', '--listIDs', type=argparse.FileType('r'), metavar='/path/to/list_IDs.txt', help='Path to list containing the IDs to be downloaded (one per line)', required=False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
431 parser_optional_download_exclusive.add_argument('-t', '--taxon', type=str, metavar='"Streptococcus agalactiae"', help='Taxon name for which ReMatCh will download fastq files', required=False)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
432
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
433 args = parser.parse_args()
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
434
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
435 if args.reference is None and not args.mlstReference:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
436 parser.error('At least --reference or --mlstReference should be provided')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
437 elif args.reference is not None and args.mlstReference:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
438 parser.error('Only --reference or --mlstReference should be provided')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
439 else:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
440 if args.mlstReference:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
441 if args.mlst is None:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
442 parser.error('Please provide species name using --mlst')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
443
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
444 if args.minFrequencyDominantAllele < 0 or args.minFrequencyDominantAllele > 1:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
445 parser.error('--minFrequencyDominantAllele should be a value between [0, 1]')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
446
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
447 if args.minGeneCoverage < 0 or args.minGeneCoverage > 100:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
448 parser.error('--minGeneCoverage should be a value between [0, 100]')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
449 if args.minGeneIdentity < 0 or args.minGeneIdentity > 100:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
450 parser.error('--minGeneIdentity should be a value between [0, 100]')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
451
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
452 start_time = time.time()
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
453
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
454 number_samples_successfully, samples_total_number = runRematch(args)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
455
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
456 print '\n' + 'END ReMatCh'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
457 print '\n' + str(number_samples_successfully) + ' samples out of ' + str(samples_total_number) + ' run successfully'
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
458 time_taken = utils.runTime(start_time)
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
459 del time_taken
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
460
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
461 if number_samples_successfully == 0:
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
462 sys.exit('No samples run successfully!')
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
463
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
464
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
465 if __name__ == "__main__":
e37910d2c794 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
466 main()