annotate yac.py @ 4:f7947c5a18b8 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 48768de9d682fb80c4981cd52ef724fdf8f6961e
author artbio
date Mon, 08 Apr 2019 07:26:41 -0400
parents 94d67b195acd
children acbf910cd2ae
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
1 #!/usr/bin/python
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
2 # yac = yet another clipper
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
3 # v 1.2.1 - 23-08-2014 - Support FastQ output
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
4 # v 1.1.0 - 23-08-2014 - argparse implementation
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
5 # Christophe Antoniewski <drosofff@gmail.com>
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
6
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
7 import argparse
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
8 from itertools import islice
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
9
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
10
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
11 def Parser():
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
12 the_parser = argparse.ArgumentParser()
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
13 the_parser.add_argument(
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
14 '--input', action="store", nargs='+', help="input fastq files")
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
15 the_parser.add_argument(
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
16 '--output', action="store", type=str,
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
17 help="output, clipped fasta file")
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
18 the_parser.add_argument(
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
19 '--output_format', action="store", type=str,
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
20 help="output format, fasta or fastq")
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
21 the_parser.add_argument(
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
22 '--adapter_to_clip', action="store", type=str,
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
23 help="adapter sequence to clip")
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
24 the_parser.add_argument(
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
25 '--min', action="store", type=int,
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
26 help="minimal size of clipped sequence to keep")
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
27 the_parser.add_argument(
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
28 '--max', action="store", type=int,
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
29 help="maximal size of clipped sequence to keep")
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
30 the_parser.add_argument('--Nmode', action="store", type=str, choices=[
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
31 "accept", "reject"],
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
32 help="accept or reject Ns in clipped sequences")
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
33 args = the_parser.parse_args()
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
34 args.adapter_to_clip = args.adapter_to_clip.upper()
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
35 return args
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
36
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
37
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
38 class Clip:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
39
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
40 def __init__(self, inputfile, outputfile, output_format,
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
41 adapter, minsize, maxsize, Nmode):
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
42 self.inputfile = inputfile
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
43 self.outputfile = outputfile
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
44 self.output_format = output_format
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
45 self.adapter = adapter
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
46 self.minsize = int(minsize)
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
47 self.maxsize = int(maxsize)
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
48 self.Nmode = Nmode
3
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
49 for line in open(inputfile):
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
50 if line[0] == "@":
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
51 self.inputformat = "fastq"
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
52 break
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
53 elif line[0] == ">":
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
54 self.inputformat = "fasta"
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
55
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
56 def motives(sequence):
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
57 '''
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
58 return a list of motives for perfect (6nt) or
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
59 imperfect (7nt with one mismatch) search on import string module
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
60 '''
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
61 sequencevariants = [
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
62 sequence[0:6]] # initializes list with 6mer perfect match
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
63 dicsubst = {"A": "TGCN", "T": "AGCN", "G": "TACN", "C": "GATN"}
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
64 for pos in enumerate(sequence[:6]):
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
65 for subst in dicsubst[pos[1]]:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
66 sequencevariants.append(
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
67 sequence[:pos[0]] + subst + sequence[pos[0] + 1:7])
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
68 return sequencevariants
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
69 self.adaptmotifs = motives(self.adapter)
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
70
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
71 def scanadapt(self, adaptmotives=[], sequence="", qscore=""):
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
72 '''scans sequence for adapter motives'''
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
73 match_position = sequence.rfind(adaptmotives[0])
3
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
74 if qscore:
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
75 if match_position != -1:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
76 return sequence[:match_position], qscore[:match_position]
3
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
77 for motif in adaptmotives[1:]:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
78 match_position = sequence.rfind(motif)
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
79 if match_position != -1:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
80 return sequence[:match_position], qscore[:match_position]
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
81 return sequence, qscore
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
82 else:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
83 if match_position != -1:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
84 return sequence[:match_position]
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
85 for motif in adaptmotives[1:]:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
86 match_position = sequence.rfind(motif)
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
87 if match_position != -1:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
88 return sequence[:match_position]
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
89 return sequence
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
90
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
91 def write_output(self, id, read, qscore, output):
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
92 if self.output_format == "fasta":
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
93 block = ">{0}\n{1}\n".format(id, read)
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
94 else:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
95 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore)
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
96 output.write(block)
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
97
3
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
98 def fasta_in_write_output(self, id, read, output):
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
99 output.write(">{0}\n{1}\n".format(id, read))
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
100
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
101 def handle_io_fastq(self):
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
102 '''Open input fastq file, pass read sequence and read qscore to
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
103 scanadapt function. Pass clipped read and qscore to output function.'''
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
104 id = 0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
105 output = open(self.outputfile, "a")
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
106 with open(self.inputfile, "r") as input:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
107 block_gen = islice(input, 1, None, 2)
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
108 for i, line in enumerate(block_gen):
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
109 if i % 2:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
110 qscore = line.rstrip()
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
111 else:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
112 read = line.rstrip()
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
113 continue
4
f7947c5a18b8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 48768de9d682fb80c4981cd52ef724fdf8f6961e
artbio
parents: 3
diff changeset
114 try:
f7947c5a18b8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 48768de9d682fb80c4981cd52ef724fdf8f6961e
artbio
parents: 3
diff changeset
115 trimmed_read, trimmed_qscore = self.scanadapt(
f7947c5a18b8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 48768de9d682fb80c4981cd52ef724fdf8f6961e
artbio
parents: 3
diff changeset
116 self.adaptmotifs, read, qscore)
f7947c5a18b8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 48768de9d682fb80c4981cd52ef724fdf8f6961e
artbio
parents: 3
diff changeset
117 except ValueError:
f7947c5a18b8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 48768de9d682fb80c4981cd52ef724fdf8f6961e
artbio
parents: 3
diff changeset
118 continue
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
119 if self.minsize <= len(trimmed_read) <= self.maxsize:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
120 if (self.Nmode == "reject") and ("N" in trimmed_read):
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
121 continue
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
122 id += 1
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
123 self.write_output(id, trimmed_read, trimmed_qscore, output)
3
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
124 output.close()
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
125
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
126 def handle_io_fasta(self):
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
127 '''Open input fasta file, pass header and read sequence to scanadapt
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
128 function. Pass clipped read and qscore to output function.'''
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
129 id = 0
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
130 output = open(self.outputfile, "a")
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
131 with open(self.inputfile, "r") as input:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
132 block_gen = islice(input, 1, None, 2)
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
133 for i, line in enumerate(block_gen):
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
134 read = line.rstrip()
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
135 trimmed_read = self.scanadapt(self.adaptmotifs, read)
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
136 if self.minsize <= len(trimmed_read) <= self.maxsize:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
137 if (self.Nmode == "reject") and ("N" in trimmed_read):
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
138 continue
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
139 id += 1
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
140 self.fasta_in_write_output(id, trimmed_read, output)
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
141 output.close()
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
142
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
143
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
144 def main(*argv):
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
145 instanceClip = Clip(*argv)
3
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
146 if instanceClip.inputformat == "fasta":
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
147 instanceClip.handle_io_fasta()
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
148 else:
94d67b195acd planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 6884c90d521932ae0981532929db9f5f44c8b4a2
artbio
parents: 2
diff changeset
149 instanceClip.handle_io_fastq()
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
150
1
7c913274e22a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit 7edb6f8c9744ec9bccee42aecf80207f0984330c
artbio
parents: 0
diff changeset
151
0
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
152 if __name__ == "__main__":
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
153 args = Parser()
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
154 for inputfile in args.input:
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
155 main(inputfile, args.output, args.output_format,
ad6b978daa2e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
artbio
parents:
diff changeset
156 args.adapter_to_clip, args.min, args.max, args.Nmode)