Mercurial > repos > iuc > extract_genomic_dna
annotate extract_genomic_dna_utils.py @ 11:80414c33a59a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 6db2d98b513e4980788fcba49d809c91e5750296
author | iuc |
---|---|
date | Thu, 21 Nov 2024 07:20:29 +0000 |
parents | 3088e7e70888 |
children |
rev | line source |
---|---|
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
1 import copy |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
2 import os |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
3 import subprocess |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
4 import sys |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
5 import tempfile |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
6 |
3
b71579ad576c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit bd4fbe120288bf8452e479cbd82aa1bbf5c4bd31
iuc
parents:
2
diff
changeset
|
7 from bx.intervals.io import Comment, GenomicInterval, Header |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
8 from bx.intervals.io import GenomicIntervalReader, NiceReaderWrapper, ParseError |
7
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
iuc
parents:
3
diff
changeset
|
9 from six import Iterator |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
10 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
11 # Default chrom, start, end, strand cols for a bed file |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
12 BED_DEFAULT_COLS = 0, 1, 2, 5 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
13 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
14 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
15 class GFFInterval(GenomicInterval): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
16 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
17 A GFF interval, including attributes. If file is strictly a GFF file, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
18 only attribute is 'group.' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
19 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
20 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
21 def __init__(self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
22 strand_col=6, score_col=5, default_strand='.', fix_strand=False): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
23 # GFF format allows '.' for strand but GenomicInterval does not. To get around this, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
24 # temporarily set strand and then unset after initing GenomicInterval. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
25 unknown_strand = False |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
26 if not fix_strand and fields[strand_col] == '.': |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
27 unknown_strand = True |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
28 fields[strand_col] = '+' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
29 GenomicInterval.__init__(self, reader, fields, chrom_col, start_col, end_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
30 strand_col, default_strand, fix_strand=fix_strand) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
31 if unknown_strand: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
32 self.strand = '.' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
33 self.fields[strand_col] = '.' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
34 # Handle feature, score column. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
35 self.feature_col = feature_col |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
36 if self.feature_col >= self.nfields: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
37 stop_err("No field for feature_col (%d)" % feature_col) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
38 self.feature = self.fields[self.feature_col] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
39 self.score_col = score_col |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
40 if self.score_col >= self.nfields: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
41 stop_err("No field for score_col (%d)" % score_col) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
42 self.score = self.fields[self.score_col] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
43 # GFF attributes. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
44 self.attributes = parse_gff_attributes(fields[8]) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
45 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
46 def copy(self): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
47 return GFFInterval(self.reader, list(self.fields), self.chrom_col, self.feature_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
48 self.start_col, self.end_col, self.strand_col, self.score_col, self.strand) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
49 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
50 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
51 class GFFFeature(GFFInterval): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
52 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
53 A GFF feature, which can include multiple intervals. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
54 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
55 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
56 def __init__(self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4, strand_col=6, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
57 score_col=5, default_strand='.', fix_strand=False, intervals=[], raw_size=0): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
58 # Use copy so that first interval and feature do not share fields. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
59 GFFInterval.__init__(self, reader, copy.deepcopy(intervals[0].fields), chrom_col, feature_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
60 start_col, end_col, strand_col, score_col, default_strand, fix_strand=fix_strand) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
61 self.intervals = intervals |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
62 self.raw_size = raw_size |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
63 # Use intervals to set feature attributes. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
64 for interval in self.intervals: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
65 # Error checking. NOTE: intervals need not share the same strand. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
66 if interval.chrom != self.chrom: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
67 stop_err("interval chrom does not match self chrom: %s != %s" % (interval.chrom, self.chrom)) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
68 # Set start, end of interval. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
69 if interval.start < self.start: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
70 self.start = interval.start |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
71 if interval.end > self.end: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
72 self.end = interval.end |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
73 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
74 def name(self): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
75 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
76 Returns feature's name. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
77 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
78 name = None |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
79 # Preference for name: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
80 # GTF: 'gene_id', 'transcript_id' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
81 # GFF3: 'ID', 'id' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
82 # GFF: 'group' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
83 for attr_name in ['gene_id', 'transcript_id', 'ID', 'id', 'group']: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
84 name = self.attributes.get(attr_name, None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
85 if name is not None: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
86 break |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
87 return name |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
88 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
89 def copy(self): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
90 intervals_copy = [] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
91 for interval in self.intervals: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
92 intervals_copy.append(interval.copy()) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
93 return GFFFeature(self.reader, self.chrom_col, self.feature_col, self.start_col, self.end_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
94 self.strand_col, self.score_col, self.strand, intervals=intervals_copy) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
95 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
96 def lines(self): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
97 lines = [] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
98 for interval in self.intervals: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
99 lines.append('\t'.join(interval.fields)) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
100 return lines |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
101 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
102 |
7
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
iuc
parents:
3
diff
changeset
|
103 class GFFReaderWrapper(Iterator, NiceReaderWrapper): |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
104 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
105 Reader wrapper for GFF files which has two major functions: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
106 1. group entries for GFF file (via group column), GFF3 (via id attribute), |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
107 or GTF (via gene_id/transcript id); |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
108 2. convert coordinates from GFF format--starting and ending coordinates |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
109 are 1-based, closed--to the 'traditional'/BED interval format--0 based, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
110 half-open. This is useful when using GFF files as inputs to tools that |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
111 expect traditional interval format. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
112 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
113 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
114 def __init__(self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4, strand_col=6, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
115 score_col=5, fix_strand=False, convert_to_bed_coord=False, **kwargs): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
116 NiceReaderWrapper.__init__(self, reader, chrom_col=chrom_col, start_col=start_col, end_col=end_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
117 strand_col=strand_col, fix_strand=fix_strand, **kwargs) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
118 self.feature_col = feature_col |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
119 self.score_col = score_col |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
120 self.convert_to_bed_coord = convert_to_bed_coord |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
121 self.last_line = None |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
122 self.cur_offset = 0 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
123 self.seed_interval = None |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
124 self.seed_interval_line_len = 0 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
125 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
126 def parse_row(self, line): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
127 interval = GFFInterval(self, line.split("\t"), self.chrom_col, self.feature_col, self.start_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
128 self.end_col, self.strand_col, self.score_col, self.default_strand, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
129 fix_strand=self.fix_strand) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
130 return interval |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
131 |
7
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
iuc
parents:
3
diff
changeset
|
132 def __next__(self): |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
133 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
134 Returns next GFFFeature. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
135 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
136 |
7
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
iuc
parents:
3
diff
changeset
|
137 def handle_parse_error(e): |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
138 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
139 Actions to take when ParseError found. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
140 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
141 if self.outstream: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
142 if self.print_delegate and hasattr(self.print_delegate, "__call__"): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
143 self.print_delegate(self.outstream, e, self) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
144 self.skipped += 1 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
145 # No reason to stuff an entire bad file into memory. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
146 if self.skipped < 10: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
147 self.skipped_lines.append((self.linenum, self.current_line, str(e))) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
148 # Get next GFFFeature |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
149 raw_size = self.seed_interval_line_len |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
150 # If there is no seed interval, set one. Also, if there are no more |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
151 # intervals to read, this is where iterator dies. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
152 if not self.seed_interval: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
153 while not self.seed_interval: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
154 try: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
155 self.seed_interval = GenomicIntervalReader.next(self) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
156 except ParseError as e: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
157 handle_parse_error(e) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
158 finally: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
159 raw_size += len(self.current_line) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
160 # If header or comment, clear seed interval and return it with its size. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
161 if isinstance(self.seed_interval, (Header, Comment)): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
162 return_val = self.seed_interval |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
163 return_val.raw_size = len(self.current_line) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
164 self.seed_interval = None |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
165 self.seed_interval_line_len = 0 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
166 return return_val |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
167 # Initialize feature identifier from seed. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
168 # For GFF. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
169 feature_group = self.seed_interval.attributes.get('group', None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
170 # For GFF3 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
171 feature_id = self.seed_interval.attributes.get('ID', None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
172 # For GTF. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
173 feature_transcript_id = self.seed_interval.attributes.get('transcript_id', None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
174 # Read all intervals associated with seed. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
175 feature_intervals = [] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
176 feature_intervals.append(self.seed_interval) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
177 while True: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
178 try: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
179 interval = GenomicIntervalReader.next(self) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
180 raw_size += len(self.current_line) |
11
80414c33a59a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 6db2d98b513e4980788fcba49d809c91e5750296
iuc
parents:
7
diff
changeset
|
181 except StopIteration: |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
182 # No more intervals to read, but last feature needs to be |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
183 # returned. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
184 interval = None |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
185 raw_size += len(self.current_line) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
186 break |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
187 except ParseError as e: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
188 handle_parse_error(e) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
189 raw_size += len(self.current_line) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
190 continue |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
191 # Ignore comments. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
192 if isinstance(interval, Comment): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
193 continue |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
194 # Determine if interval is part of feature. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
195 part_of = False |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
196 group = interval.attributes.get('group', None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
197 # GFF test: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
198 if group and feature_group == group: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
199 part_of = True |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
200 # GFF3 test: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
201 parent_id = interval.attributes.get('Parent', None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
202 cur_id = interval.attributes.get('ID', None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
203 if (cur_id and cur_id == feature_id) or (parent_id and parent_id == feature_id): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
204 part_of = True |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
205 # GTF test: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
206 transcript_id = interval.attributes.get('transcript_id', None) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
207 if transcript_id and transcript_id == feature_transcript_id: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
208 part_of = True |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
209 # If interval is not part of feature, clean up and break. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
210 if not part_of: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
211 # Adjust raw size because current line is not part of feature. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
212 raw_size -= len(self.current_line) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
213 break |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
214 # Interval associated with feature. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
215 feature_intervals.append(interval) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
216 # Last interval read is the seed for the next interval. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
217 self.seed_interval = interval |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
218 self.seed_interval_line_len = len(self.current_line) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
219 # Return feature. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
220 feature = GFFFeature(self, self.chrom_col, self.feature_col, self.start_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
221 self.end_col, self.strand_col, self.score_col, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
222 self.default_strand, fix_strand=self.fix_strand, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
223 intervals=feature_intervals, raw_size=raw_size) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
224 # Convert to BED coords? |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
225 if self.convert_to_bed_coord: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
226 convert_gff_coords_to_bed(feature) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
227 return feature |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
228 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
229 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
230 def convert_bed_coords_to_gff(interval): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
231 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
232 Converts an interval object's coordinates from BED format to GFF format. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
233 Accepted object types include GenomicInterval and list (where the first |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
234 element in the list is the interval's start, and the second element is |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
235 the interval's end). |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
236 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
237 if isinstance(interval, GenomicInterval): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
238 interval.start += 1 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
239 if isinstance(interval, GFFFeature): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
240 for subinterval in interval.intervals: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
241 convert_bed_coords_to_gff(subinterval) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
242 elif isinstance(interval, list): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
243 interval[0] += 1 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
244 return interval |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
245 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
246 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
247 def convert_gff_coords_to_bed(interval): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
248 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
249 Converts an interval object's coordinates from GFF format to BED format. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
250 Accepted object types include GFFFeature, GenomicInterval, and list (where |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
251 the first element in the list is the interval's start, and the second |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
252 element is the interval's end). |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
253 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
254 if isinstance(interval, GenomicInterval): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
255 interval.start -= 1 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
256 if isinstance(interval, GFFFeature): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
257 for subinterval in interval.intervals: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
258 convert_gff_coords_to_bed(subinterval) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
259 elif isinstance(interval, list): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
260 interval[0] -= 1 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
261 return interval |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
262 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
263 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
264 def convert_to_twobit(reference_genome): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
265 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
266 Create 2bit file history fasta dataset. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
267 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
268 try: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
269 seq_path = tempfile.NamedTemporaryFile(dir=".").name |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
270 cmd = "faToTwoBit %s %s" % (reference_genome, seq_path) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
271 tmp_name = tempfile.NamedTemporaryFile(dir=".").name |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
272 tmp_stderr = open(tmp_name, 'wb') |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
273 proc = subprocess.Popen(args=cmd, shell=True, stderr=tmp_stderr.fileno()) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
274 returncode = proc.wait() |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
275 tmp_stderr.close() |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
276 if returncode != 0: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
277 # Get stderr, allowing for case where it's very large. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
278 tmp_stderr = open(tmp_name, 'rb') |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
279 stderr = '' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
280 buffsize = 1048576 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
281 try: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
282 while True: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
283 stderr += tmp_stderr.read(buffsize) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
284 if not stderr or len(stderr) % buffsize != 0: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
285 break |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
286 except OverflowError: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
287 pass |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
288 tmp_stderr.close() |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
289 os.remove(tmp_name) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
290 stop_err(stderr) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
291 return seq_path |
3
b71579ad576c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit bd4fbe120288bf8452e479cbd82aa1bbf5c4bd31
iuc
parents:
2
diff
changeset
|
292 except Exception as e: |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
293 stop_err('Error running faToTwoBit. ' + str(e)) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
294 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
295 |
2
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
296 def get_bedtools_getfasta_default_header(chrom, start, end, strand, includes_strand_col): |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
297 """ |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
298 Return a fasta header that is the default produced by the bedtools |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
299 getfasta tool, assuming "force strandedness". This will produce a |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
300 header with this format: <chrom>:<start>-<end>(strand). If the input |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
301 data includes a strand column and the strand is '+' or '-', then use it. |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
302 If the input data includes a strand column and the value of strand is |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
303 anything but '+' or '-', set strand to '.' in the header. If the input |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
304 data does not include a strand column, set strand to '.' in the header. |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
305 """ |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
306 if includes_strand_col and strand in ['+', '-']: |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
307 strand_val = strand |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
308 else: |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
309 strand_val = '.' |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
310 return '%s:%s-%s(%s)' % (chrom, start, end, strand_val) |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
311 |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
312 |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
313 def get_fasta_header_delimiter(delimiter): |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
314 # Return a specified fasta header delimiter. |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
315 if delimiter == 'underscore': |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
316 return '_' |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
317 if delimiter == 'semicolon': |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
318 return ';' |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
319 if delimiter == 'comma': |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
320 return ',' |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
321 if delimiter == 'tilde': |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
322 return '~' |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
323 if delimiter == 'vertical_bar': |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
324 return '|' |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
325 # Set the default to underscore. |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
326 return '_' |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
327 |
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
iuc
parents:
0
diff
changeset
|
328 |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
329 def get_lines(feature): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
330 # Get feature's line(s). |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
331 if isinstance(feature, GFFFeature): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
332 return feature.lines() |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
333 else: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
334 return [feature.rstrip('\r\n')] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
335 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
336 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
337 def gff_attributes_to_str(attrs, gff_format): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
338 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
339 Convert GFF attributes to string. Supported formats are GFF3, GTF. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
340 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
341 if gff_format == 'GTF': |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
342 format_string = '%s "%s"' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
343 # Convert group (GFF) and ID, parent (GFF3) attributes to |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
344 # transcript_id, gene_id. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
345 id_attr = None |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
346 if 'group' in attrs: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
347 id_attr = 'group' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
348 elif 'ID' in attrs: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
349 id_attr = 'ID' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
350 elif 'Parent' in attrs: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
351 id_attr = 'Parent' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
352 if id_attr: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
353 attrs['transcript_id'] = attrs['gene_id'] = attrs[id_attr] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
354 elif gff_format == 'GFF3': |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
355 format_string = '%s=%s' |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
356 attrs_strs = [] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
357 for name, value in attrs.items(): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
358 attrs_strs.append(format_string % (name, value)) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
359 return " ; ".join(attrs_strs) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
360 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
361 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
362 def parse_cols_arg(cols): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
363 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
364 Parse a columns command line argument into a four-tuple. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
365 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
366 if cols: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
367 # Handle case where no strand column included - in this case, cols |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
368 # looks something like 1,2,3, |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
369 if cols.endswith(','): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
370 cols += '0' |
7
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
iuc
parents:
3
diff
changeset
|
371 col_list = [int(x) - 1 for x in cols.split(",")] |
0
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
372 return col_list |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
373 else: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
374 return BED_DEFAULT_COLS |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
375 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
376 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
377 def parse_gff_attributes(attr_str): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
378 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
379 Parses a GFF/GTF attribute string and returns a dictionary of name-value |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
380 pairs. The general format for a GFF3 attributes string is |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
381 name1=value1;name2=value2 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
382 The general format for a GTF attribute string is |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
383 name1 "value1" ; name2 "value2" |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
384 The general format for a GFF attribute string is a single string that |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
385 denotes the interval's group; in this case, method returns a dictionary |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
386 with a single key-value pair, and key name is 'group'. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
387 """ |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
388 attributes_list = attr_str.split(";") |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
389 attributes = {} |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
390 for name_value_pair in attributes_list: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
391 # Try splitting by '=' (GFF3) first because spaces are allowed in GFF3 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
392 # attribute; next, try double quotes for GTF. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
393 pair = name_value_pair.strip().split("=") |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
394 if len(pair) == 1: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
395 pair = name_value_pair.strip().split("\"") |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
396 if len(pair) == 1: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
397 # Could not split for some reason. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
398 continue |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
399 if pair == '': |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
400 continue |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
401 name = pair[0].strip() |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
402 if name == '': |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
403 continue |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
404 # Need to strip double quote from values |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
405 value = pair[1].strip(" \"") |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
406 attributes[name] = value |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
407 if len(attributes) == 0: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
408 # Could not split attributes string, so entire string must be |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
409 # 'group' attribute. This is the case for strictly GFF files. |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
410 attributes['group'] = attr_str |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
411 return attributes |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
412 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
413 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
414 def reverse_complement(s): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
415 complement_dna = {"A": "T", "T": "A", "C": "G", "G": "C", "a": "t", "t": "a", "c": "g", "g": "c", "N": "N", "n": "n"} |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
416 reversed_s = [] |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
417 for i in s: |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
418 reversed_s.append(complement_dna[i]) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
419 reversed_s.reverse() |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
420 return "".join(reversed_s) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
421 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
422 |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
423 def stop_err(msg): |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
424 sys.stderr.write(msg) |
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
iuc
parents:
diff
changeset
|
425 sys.exit(1) |