Mercurial > repos > devteam > intersect
annotate utils/gff_util.py @ 5:33b3f3688db4 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author | devteam |
---|---|
date | Thu, 22 Jun 2017 18:52:23 -0400 |
parents | 8ddabc73af92 |
children |
rev | line source |
---|---|
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
1 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
2 Provides utilities for working with GFF files. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
3 """ |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
4 import copy |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
5 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
6 from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
7 from bx.tabular.io import Comment, Header, ParseError |
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
8 |
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
9 from .odict import odict |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
10 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
11 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
12 class GFFInterval( GenomicInterval ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
13 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
14 A GFF interval, including attributes. If file is strictly a GFF file, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
15 only attribute is 'group.' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
16 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
17 def __init__( self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
18 strand_col=6, score_col=5, default_strand='.', fix_strand=False ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
19 # HACK: GFF format allows '.' for strand but GenomicInterval does not. To get around this, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
20 # temporarily set strand and then unset after initing GenomicInterval. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
21 unknown_strand = False |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
22 if not fix_strand and fields[ strand_col ] == '.': |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
23 unknown_strand = True |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
24 fields[ strand_col ] = '+' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
25 GenomicInterval.__init__( self, reader, fields, chrom_col, start_col, end_col, strand_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
26 default_strand, fix_strand=fix_strand ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
27 if unknown_strand: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
28 self.strand = '.' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
29 self.fields[ strand_col ] = '.' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
30 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
31 # Handle feature, score column. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
32 self.feature_col = feature_col |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
33 if self.feature_col >= self.nfields: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
34 raise MissingFieldError( "No field for feature_col (%d)" % feature_col ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
35 self.feature = self.fields[ self.feature_col ] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
36 self.score_col = score_col |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
37 if self.score_col >= self.nfields: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
38 raise MissingFieldError( "No field for score_col (%d)" % score_col ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
39 self.score = self.fields[ self.score_col ] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
40 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
41 # GFF attributes. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
42 self.attributes = parse_gff_attributes( fields[8] ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
43 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
44 def copy( self ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
45 return GFFInterval(self.reader, list( self.fields ), self.chrom_col, self.feature_col, self.start_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
46 self.end_col, self.strand_col, self.score_col, self.strand) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
47 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
48 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
49 class GFFFeature( GFFInterval ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
50 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
51 A GFF feature, which can include multiple intervals. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
52 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
53 def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
54 strand_col=6, score_col=5, default_strand='.', fix_strand=False, intervals=[], |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
55 raw_size=0 ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
56 # Use copy so that first interval and feature do not share fields. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
57 GFFInterval.__init__( self, reader, copy.deepcopy( intervals[0].fields ), chrom_col, feature_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
58 start_col, end_col, strand_col, score_col, default_strand, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
59 fix_strand=fix_strand ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
60 self.intervals = intervals |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
61 self.raw_size = raw_size |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
62 # Use intervals to set feature attributes. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
63 for interval in self.intervals: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
64 # Error checking. NOTE: intervals need not share the same strand. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
65 if interval.chrom != self.chrom: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
66 raise ValueError( "interval chrom does not match self chrom: %s != %s" % |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
67 ( interval.chrom, self.chrom ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
68 # Set start, end of interval. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
69 if interval.start < self.start: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
70 self.start = interval.start |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
71 if interval.end > self.end: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
72 self.end = interval.end |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
73 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
74 def name( self ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
75 """ Returns feature's name. """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
76 name = None |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
77 # Preference for name: GTF, GFF3, GFF. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
78 for attr_name in ['gene_id', 'transcript_id', # GTF |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
79 'ID', 'id', # GFF3 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
80 'group' ]: # GFF (TODO) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
81 name = self.attributes.get( attr_name, None ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
82 if name is not None: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
83 break |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
84 return name |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
85 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
86 def copy( self ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
87 intervals_copy = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
88 for interval in self.intervals: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
89 intervals_copy.append( interval.copy() ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
90 return GFFFeature(self.reader, self.chrom_col, self.feature_col, self.start_col, self.end_col, self.strand_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
91 self.score_col, self.strand, intervals=intervals_copy ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
92 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
93 def lines( self ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
94 lines = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
95 for interval in self.intervals: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
96 lines.append( '\t'.join( interval.fields ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
97 return lines |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
98 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
99 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
100 class GFFIntervalToBEDReaderWrapper( NiceReaderWrapper ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
101 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
102 Reader wrapper that reads GFF intervals/lines and automatically converts |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
103 them to BED format. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
104 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
105 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
106 def parse_row( self, line ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
107 # HACK: this should return a GFF interval, but bx-python operations |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
108 # require GenomicInterval objects and subclasses will not work. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
109 interval = GenomicInterval( self, line.split( "\t" ), self.chrom_col, self.start_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
110 self.end_col, self.strand_col, self.default_strand, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
111 fix_strand=self.fix_strand ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
112 interval = convert_gff_coords_to_bed( interval ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
113 return interval |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
114 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
115 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
116 class GFFReaderWrapper( NiceReaderWrapper ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
117 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
118 Reader wrapper for GFF files. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
119 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
120 Wrapper has two major functions: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
121 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
122 1. group entries for GFF file (via group column), GFF3 (via id attribute), |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
123 or GTF (via gene_id/transcript id); |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
124 2. convert coordinates from GFF format--starting and ending coordinates |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
125 are 1-based, closed--to the 'traditional'/BED interval format--0 based, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
126 half-open. This is useful when using GFF files as inputs to tools that |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
127 expect traditional interval format. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
128 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
129 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
130 def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
131 end_col=4, strand_col=6, score_col=5, fix_strand=False, convert_to_bed_coord=False, **kwargs ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
132 NiceReaderWrapper.__init__( self, reader, chrom_col=chrom_col, start_col=start_col, end_col=end_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
133 strand_col=strand_col, fix_strand=fix_strand, **kwargs ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
134 self.feature_col = feature_col |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
135 self.score_col = score_col |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
136 self.convert_to_bed_coord = convert_to_bed_coord |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
137 self.last_line = None |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
138 self.cur_offset = 0 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
139 self.seed_interval = None |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
140 self.seed_interval_line_len = 0 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
141 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
142 def parse_row( self, line ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
143 interval = GFFInterval( self, line.split( "\t" ), self.chrom_col, self.feature_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
144 self.start_col, self.end_col, self.strand_col, self.score_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
145 self.default_strand, fix_strand=self.fix_strand ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
146 return interval |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
147 |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
148 def __next__( self ): |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
149 """ Returns next GFFFeature. """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
150 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
151 # |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
152 # Helper function. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
153 # |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
154 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
155 def handle_parse_error( parse_error ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
156 """ Actions to take when ParseError found. """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
157 if self.outstream: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
158 if self.print_delegate and hasattr(self.print_delegate, "__call__"): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
159 self.print_delegate( self.outstream, e, self ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
160 self.skipped += 1 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
161 # no reason to stuff an entire bad file into memmory |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
162 if self.skipped < 10: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
163 self.skipped_lines.append( ( self.linenum, self.current_line, str( e ) ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
164 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
165 # For debugging, uncomment this to propogate parsing exceptions up. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
166 # I.e. the underlying reason for an unexpected StopIteration exception |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
167 # can be found by uncommenting this. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
168 # raise e |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
169 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
170 # |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
171 # Get next GFFFeature |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
172 # |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
173 raw_size = self.seed_interval_line_len |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
174 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
175 # If there is no seed interval, set one. Also, if there are no more |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
176 # intervals to read, this is where iterator dies. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
177 if not self.seed_interval: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
178 while not self.seed_interval: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
179 try: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
180 self.seed_interval = GenomicIntervalReader.next( self ) |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
181 except ParseError as e: |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
182 handle_parse_error( e ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
183 # TODO: When no longer supporting python 2.4 use finally: |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
184 # finally: |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
185 raw_size += len( self.current_line ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
186 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
187 # If header or comment, clear seed interval and return it with its size. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
188 if isinstance( self.seed_interval, ( Header, Comment ) ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
189 return_val = self.seed_interval |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
190 return_val.raw_size = len( self.current_line ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
191 self.seed_interval = None |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
192 self.seed_interval_line_len = 0 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
193 return return_val |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
194 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
195 # Initialize feature identifier from seed. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
196 feature_group = self.seed_interval.attributes.get( 'group', None ) # For GFF |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
197 # For GFF3 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
198 feature_id = self.seed_interval.attributes.get( 'ID', None ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
199 # For GTF. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
200 feature_transcript_id = self.seed_interval.attributes.get( 'transcript_id', None ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
201 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
202 # Read all intervals associated with seed. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
203 feature_intervals = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
204 feature_intervals.append( self.seed_interval ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
205 while True: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
206 try: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
207 interval = GenomicIntervalReader.next( self ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
208 raw_size += len( self.current_line ) |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
209 except StopIteration as e: |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
210 # No more intervals to read, but last feature needs to be |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
211 # returned. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
212 interval = None |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
213 raw_size += len( self.current_line ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
214 break |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
215 except ParseError as e: |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
216 handle_parse_error( e ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
217 raw_size += len( self.current_line ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
218 continue |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
219 # TODO: When no longer supporting python 2.4 use finally: |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
220 # finally: |
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
221 # raw_size += len( self.current_line ) |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
222 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
223 # Ignore comments. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
224 if isinstance( interval, Comment ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
225 continue |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
226 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
227 # Determine if interval is part of feature. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
228 part_of = False |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
229 group = interval.attributes.get( 'group', None ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
230 # GFF test: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
231 if group and feature_group == group: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
232 part_of = True |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
233 # GFF3 test: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
234 parent_id = interval.attributes.get( 'Parent', None ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
235 cur_id = interval.attributes.get( 'ID', None ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
236 if ( cur_id and cur_id == feature_id ) or ( parent_id and parent_id == feature_id ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
237 part_of = True |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
238 # GTF test: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
239 transcript_id = interval.attributes.get( 'transcript_id', None ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
240 if transcript_id and transcript_id == feature_transcript_id: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
241 part_of = True |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
242 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
243 # If interval is not part of feature, clean up and break. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
244 if not part_of: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
245 # Adjust raw size because current line is not part of feature. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
246 raw_size -= len( self.current_line ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
247 break |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
248 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
249 # Interval associated with feature. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
250 feature_intervals.append( interval ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
251 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
252 # Last interval read is the seed for the next interval. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
253 self.seed_interval = interval |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
254 self.seed_interval_line_len = len( self.current_line ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
255 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
256 # Return feature. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
257 feature = GFFFeature( self, self.chrom_col, self.feature_col, self.start_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
258 self.end_col, self.strand_col, self.score_col, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
259 self.default_strand, fix_strand=self.fix_strand, |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
260 intervals=feature_intervals, raw_size=raw_size ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
261 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
262 # Convert to BED coords? |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
263 if self.convert_to_bed_coord: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
264 convert_gff_coords_to_bed( feature ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
265 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
266 return feature |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
267 next = __next__ # This line should be removed once the bx-python port to Python3 is finished |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
268 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
269 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
270 def convert_bed_coords_to_gff( interval ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
271 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
272 Converts an interval object's coordinates from BED format to GFF format. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
273 Accepted object types include GenomicInterval and list (where the first |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
274 element in the list is the interval's start, and the second element is |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
275 the interval's end). |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
276 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
277 if isinstance( interval, GenomicInterval ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
278 interval.start += 1 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
279 if isinstance( interval, GFFFeature ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
280 for subinterval in interval.intervals: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
281 convert_bed_coords_to_gff( subinterval ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
282 elif type( interval ) is list: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
283 interval[ 0 ] += 1 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
284 return interval |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
285 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
286 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
287 def convert_gff_coords_to_bed( interval ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
288 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
289 Converts an interval object's coordinates from GFF format to BED format. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
290 Accepted object types include GFFFeature, GenomicInterval, and list (where |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
291 the first element in the list is the interval's start, and the second |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
292 element is the interval's end). |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
293 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
294 if isinstance( interval, GenomicInterval ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
295 interval.start -= 1 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
296 if isinstance( interval, GFFFeature ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
297 for subinterval in interval.intervals: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
298 convert_gff_coords_to_bed( subinterval ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
299 elif type( interval ) is list: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
300 interval[ 0 ] -= 1 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
301 return interval |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
302 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
303 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
304 def parse_gff_attributes( attr_str ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
305 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
306 Parses a GFF/GTF attribute string and returns a dictionary of name-value |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
307 pairs. The general format for a GFF3 attributes string is |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
308 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
309 name1=value1;name2=value2 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
310 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
311 The general format for a GTF attribute string is |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
312 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
313 name1 "value1" ; name2 "value2" |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
314 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
315 The general format for a GFF attribute string is a single string that |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
316 denotes the interval's group; in this case, method returns a dictionary |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
317 with a single key-value pair, and key name is 'group' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
318 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
319 attributes_list = attr_str.split(";") |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
320 attributes = {} |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
321 for name_value_pair in attributes_list: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
322 # Try splitting by '=' (GFF3) first because spaces are allowed in GFF3 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
323 # attribute; next, try double quotes for GTF. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
324 pair = name_value_pair.strip().split("=") |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
325 if len( pair ) == 1: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
326 pair = name_value_pair.strip().split("\"") |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
327 if len( pair ) == 1: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
328 # Could not split for some reason -- raise exception? |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
329 continue |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
330 if pair == '': |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
331 continue |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
332 name = pair[0].strip() |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
333 if name == '': |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
334 continue |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
335 # Need to strip double quote from values |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
336 value = pair[1].strip(" \"") |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
337 attributes[ name ] = value |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
338 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
339 if len( attributes ) == 0: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
340 # Could not split attributes string, so entire string must be |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
341 # 'group' attribute. This is the case for strictly GFF files. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
342 attributes['group'] = attr_str |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
343 return attributes |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
344 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
345 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
346 def gff_attributes_to_str( attrs, gff_format ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
347 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
348 Convert GFF attributes to string. Supported formats are GFF3, GTF. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
349 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
350 if gff_format == 'GTF': |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
351 format_string = '%s "%s"' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
352 # Convert group (GFF) and ID, parent (GFF3) attributes to transcript_id, gene_id |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
353 id_attr = None |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
354 if 'group' in attrs: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
355 id_attr = 'group' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
356 elif 'ID' in attrs: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
357 id_attr = 'ID' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
358 elif 'Parent' in attrs: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
359 id_attr = 'Parent' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
360 if id_attr: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
361 attrs['transcript_id'] = attrs['gene_id'] = attrs[id_attr] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
362 elif gff_format == 'GFF3': |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
363 format_string = '%s=%s' |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
364 attrs_strs = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
365 for name, value in attrs.items(): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
366 attrs_strs.append( format_string % ( name, value ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
367 return " ; ".join( attrs_strs ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
368 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
369 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
370 def read_unordered_gtf( iterator, strict=False ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
371 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
372 Returns GTF features found in an iterator. GTF lines need not be ordered |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
373 or clustered for reader to work. Reader returns GFFFeature objects sorted |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
374 by transcript_id, chrom, and start position. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
375 """ |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
376 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
377 # -- Get function that generates line/feature key. -- |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
378 |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
379 def get_transcript_id(fields): |
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
380 return parse_gff_attributes( fields[8] )[ 'transcript_id' ] |
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
381 |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
382 if strict: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
383 # Strict GTF parsing uses transcript_id only to group lines into feature. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
384 key_fn = get_transcript_id |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
385 else: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
386 # Use lenient parsing where chromosome + transcript_id is the key. This allows |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
387 # transcripts with same ID on different chromosomes; this occurs in some popular |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
388 # datasources, such as RefGenes in UCSC. |
5
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
389 def key_fn(fields): |
33b3f3688db4
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
devteam
parents:
4
diff
changeset
|
390 return fields[0] + '_' + get_transcript_id( fields ) |
4
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
391 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
392 # Aggregate intervals by transcript_id and collect comments. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
393 feature_intervals = odict() |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
394 comments = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
395 for count, line in enumerate( iterator ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
396 if line.startswith( '#' ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
397 comments.append( Comment( line ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
398 continue |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
399 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
400 line_key = key_fn( line.split('\t') ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
401 if line_key in feature_intervals: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
402 feature = feature_intervals[ line_key ] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
403 else: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
404 feature = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
405 feature_intervals[ line_key ] = feature |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
406 feature.append( GFFInterval( None, line.split( '\t' ) ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
407 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
408 # Create features. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
409 chroms_features = {} |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
410 for count, intervals in enumerate( feature_intervals.values() ): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
411 # Sort intervals by start position. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
412 intervals.sort( lambda a, b: cmp( a.start, b.start ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
413 feature = GFFFeature( None, intervals=intervals ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
414 if feature.chrom not in chroms_features: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
415 chroms_features[ feature.chrom ] = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
416 chroms_features[ feature.chrom ].append( feature ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
417 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
418 # Sort features by chrom, start position. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
419 chroms_features_sorted = [] |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
420 for chrom_features in chroms_features.values(): |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
421 chroms_features_sorted.append( chrom_features ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
422 chroms_features_sorted.sort( lambda a, b: cmp( a[0].chrom, b[0].chrom ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
423 for features in chroms_features_sorted: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
424 features.sort( lambda a, b: cmp( a.start, b.start ) ) |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
425 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
426 # Yield comments first, then features. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
427 # FIXME: comments can appear anywhere in file, not just the beginning. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
428 # Ideally, then comments would be associated with features and output |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
429 # just before feature/line. |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
430 for comment in comments: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
431 yield comment |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
432 |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
433 for chrom_features in chroms_features_sorted: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
434 for feature in chrom_features: |
8ddabc73af92
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/intersect commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff
changeset
|
435 yield feature |