Repository 'flanking_features'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/flanking_features

Changeset 1:8307665c4b6c (2015-11-11)
Previous changeset 0:90100b587723 (2014-04-01) Next changeset 2:a09d13b108fd (2017-06-22)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit a1517c9d22029095120643bbe2c8fa53754dd2b7
modified:
flanking_features.py
tool_dependencies.xml
utils/gff_util.py
utils/odict.py
b
diff -r 90100b587723 -r 8307665c4b6c flanking_features.py
--- a/flanking_features.py Tue Apr 01 10:52:59 2014 -0400
+++ b/flanking_features.py Wed Nov 11 12:48:18 2015 -0500
[
b'@@ -10,21 +10,23 @@\n     -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval\n """\n \n-import sys, traceback, fileinput\n-from warnings import warn\n+import fileinput\n+import sys\n from bx.cookbook import doc_optparse\n-from galaxy.tools.util.galaxyops import *\n-from bx.intervals.io import *\n+from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper\n from bx.intervals.operations import quicksect\n-from utils.gff_util import *\n+from bx.tabular.io import ParseError\n+from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped\n+from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper\n \n assert sys.version_info[:2] >= ( 2, 4 )\n \n-def get_closest_feature (node, direction, threshold_up, threshold_down, report_func_up, report_func_down):\n+\n+def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down):\n     #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases\n     #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand\n     #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand\n-    if direction == 1: \n+    if direction == 1:\n         if node.maxend <= threshold_up:\n             if node.end == node.maxend:\n                 report_func_up(node)\n@@ -60,10 +62,11 @@\n             if node.right:\n                 get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)\n \n+\n def proximal_region_finder(readers, region, comments=True):\n     """\n-    Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ]. \n-    Intervals are GenomicInterval objects. \n+    Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ].\n+    Intervals are GenomicInterval objects.\n     """\n     primary = readers[0]\n     features = readers[1]\n@@ -76,13 +79,13 @@\n         up, down = True, True\n         if region == \'Either\':\n             either = True\n-        \n+\n     # Read features into memory:\n     rightTree = quicksect.IntervalTree()\n     for item in features:\n         if type( item ) is GenomicInterval:\n             rightTree.insert( item, features.linenum, item )\n-            \n+\n     for interval in primary:\n         if type( interval ) is Header:\n             yield interval\n@@ -96,33 +99,33 @@\n             if chrom not in rightTree.chroms:\n                 continue\n             else:\n-                root = rightTree.chroms[chrom]    #root node for the chrom tree\n+                root = rightTree.chroms[chrom]  # root node for the chrom tree\n                 result_up = []\n                 result_down = []\n-                if (strand == \'+\' and up) or (strand == \'-\' and down): \n+                if (strand == \'+\' and up) or (strand == \'-\' and down):\n                     #upstream +ve strand and downstream -ve strand cases\n-                    get_closest_feature (root, 1, start, None, lambda node: result_up.append( node ), None)\n-                    \n+                    get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None)\n+\n                 if (strand == \'+\' and down) or (strand == \'-\' and up):\n                     #downstream +ve strand and upstream -ve strand case\n-                    get_closest_feature (root, 0, None, end-1, None, lambda node: result_down.append( node ))\n-                \n+                    get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node ))\n+\n                 if result_up:\n-                    if len(result_up) > 1: #The results_up list has a list of intervals upstream to the given interval. \n+                    if len(result_up) > 1:  # The results_up list has a list of intervals upstream to the given interval.\n                         ends = ['..b'               \n-                if result_down:    \n+\n+                if result_down:\n                     if not(either):\n                         #The last element of result_down will be the closest element to the given interval\n-                        yield [ interval, result_down[-1].other ] \n-                \n+                        yield [ interval, result_down[-1].other ]\n+\n                 if either and (result_up or result_down):\n                     iter_val = []\n                     if result_up and result_down:\n@@ -137,7 +140,8 @@\n                         #The last element of result_down will be the closest element to the given interval\n                         iter_val = [ interval, result_down[-1].other ]\n                     yield iter_val\n-                        \n+\n+\n def main():\n     options, args = doc_optparse.parse( __doc__ )\n     try:\n@@ -148,7 +152,7 @@\n         in_fname, in2_fname, out_fname, direction = args\n     except:\n         doc_optparse.exception()\n-        \n+\n     # Set readers to handle either GFF or default format.\n     if in1_gff_format:\n         in1_reader_wrapper = GFFIntervalToBEDReaderWrapper\n@@ -160,22 +164,22 @@\n         in2_reader_wrapper = NiceReaderWrapper\n \n     g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),\n-                            chrom_col=chr_col_1,\n-                            start_col=start_col_1,\n-                            end_col=end_col_1,\n-                            strand_col=strand_col_1,\n-                            fix_strand=True )\n+                             chrom_col=chr_col_1,\n+                             start_col=start_col_1,\n+                             end_col=end_col_1,\n+                             strand_col=strand_col_1,\n+                             fix_strand=True )\n     g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),\n-                            chrom_col=chr_col_2,\n-                            start_col=start_col_2,\n-                            end_col=end_col_2,\n-                            strand_col=strand_col_2,\n-                            fix_strand=True )\n+                             chrom_col=chr_col_2,\n+                             start_col=start_col_2,\n+                             end_col=end_col_2,\n+                             strand_col=strand_col_2,\n+                             fix_strand=True )\n \n     # Find flanking features.\n     out_file = open( out_fname, "w" )\n     try:\n-        for result in proximal_region_finder([g1,g2], direction):\n+        for result in proximal_region_finder([g1, g2], direction):\n             if type( result ) is list:\n                 line, closest_feature = result\n                 # Need to join outputs differently depending on file types.\n@@ -185,12 +189,12 @@\n                     # Invervals are in BED coordinates; need to convert to GFF.\n                     line = convert_bed_coords_to_gff( line )\n                     closest_feature = convert_bed_coords_to_gff( closest_feature )\n-                    \n+\n                     # Replace double quotes with single quotes in closest feature\'s attributes.\n-                    out_file.write( "%s closest_feature \\"%s\\" \\n" % \n-                                    ( "\\t".join( line.fields ), \\\n+                    out_file.write( "%s closest_feature \\"%s\\" \\n" %\n+                                    ( "\\t".join( line.fields ),\n                                       "\\t".join( closest_feature.fields ).replace( "\\"", "\\\\\\"" )\n-                                     ) )\n+                                      ) )\n                 else:\n                     # Output is BED + closest feature fields.\n                     output_line_fields = []\n@@ -202,7 +206,7 @@\n     except ParseError, exc:\n         fail( "Invalid file format: %s" % str( exc ) )\n \n-    print "Direction: %s" %(direction)\n+    print "Direction: %s" % (direction)\n     if g1.skipped > 0:\n         print skipped( g1, filedesc=" of 1st dataset" )\n     if g2.skipped > 0:\n'
b
diff -r 90100b587723 -r 8307665c4b6c tool_dependencies.xml
--- a/tool_dependencies.xml Tue Apr 01 10:52:59 2014 -0400
+++ b/tool_dependencies.xml Wed Nov 11 12:48:18 2015 -0500
b
@@ -1,9 +1,9 @@
 <?xml version="1.0"?>
 <tool_dependency>
   <package name="bx-python" version="0.7.1">
-      <repository changeset_revision="41eb9d9f667d" name="package_bx_python_0_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+      <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="galaxy-ops" version="1.0.0">
-      <repository changeset_revision="4e39032e4ec6" name="package_galaxy_ops_1_0_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+      <repository changeset_revision="9cbb20b85c01" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>
b
diff -r 90100b587723 -r 8307665c4b6c utils/gff_util.py
--- a/utils/gff_util.py Tue Apr 01 10:52:59 2014 -0400
+++ b/utils/gff_util.py Wed Nov 11 12:48:18 2015 -0500
[
b'@@ -3,16 +3,17 @@\n """\n \n import copy\n-from bx.intervals.io import *\n-from bx.tabular.io import Header, Comment\n+from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper\n+from bx.tabular.io import Header, Comment, ParseError\n from utils.odict import odict\n \n+\n class GFFInterval( GenomicInterval ):\n     """\n     A GFF interval, including attributes. If file is strictly a GFF file,\n     only attribute is \'group.\'\n     """\n-    def __init__( self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4, \\\n+    def __init__( self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4,\n                   strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False ):\n         # HACK: GFF format allows \'.\' for strand but GenomicInterval does not. To get around this,\n         # temporarily set strand and then unset after initing GenomicInterval.\n@@ -20,7 +21,7 @@\n         if not fix_strand and fields[ strand_col ] == \'.\':\n             unknown_strand = True\n             fields[ strand_col ] = \'+\'\n-        GenomicInterval.__init__( self, reader, fields, chrom_col, start_col, end_col, strand_col, \\\n+        GenomicInterval.__init__( self, reader, fields, chrom_col, start_col, end_col, strand_col,\n                                   default_strand, fix_strand=fix_strand )\n         if unknown_strand:\n             self.strand = \'.\'\n@@ -43,16 +44,17 @@\n         return GFFInterval(self.reader, list( self.fields ), self.chrom_col, self.feature_col, self.start_col,\n                            self.end_col, self.strand_col, self.score_col, self.strand)\n \n+\n class GFFFeature( GFFInterval ):\n     """\n     A GFF feature, which can include multiple intervals.\n     """\n-    def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4, \\\n-                  strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False, intervals=[], \\\n+    def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4,\n+                  strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False, intervals=[],\n                   raw_size=0 ):\n         # Use copy so that first interval and feature do not share fields.\n-        GFFInterval.__init__( self, reader, copy.deepcopy( intervals[0].fields ), chrom_col, feature_col, \\\n-                              start_col, end_col, strand_col, score_col, default_strand, \\\n+        GFFInterval.__init__( self, reader, copy.deepcopy( intervals[0].fields ), chrom_col, feature_col,\n+                              start_col, end_col, strand_col, score_col, default_strand,\n                               fix_strand=fix_strand )\n         self.intervals = intervals\n         self.raw_size = raw_size\n@@ -60,7 +62,7 @@\n         for interval in self.intervals:\n             # Error checking. NOTE: intervals need not share the same strand.\n             if interval.chrom != self.chrom:\n-                raise ValueError( "interval chrom does not match self chrom: %s != %s" % \\\n+                raise ValueError( "interval chrom does not match self chrom: %s != %s" %\n                                   ( interval.chrom, self.chrom ) )\n             # Set start, end of interval.\n             if interval.start < self.start:\n@@ -72,13 +74,9 @@\n         """ Returns feature\'s name. """\n         name = None\n         # Preference for name: GTF, GFF3, GFF.\n-        for attr_name in [\n-                           # GTF:\n-                           \'gene_id\', \'transcript_id\',\n-                           # GFF3:\n-                           \'ID\', \'id\',\n-                           # GFF (TODO):\n-                           \'group\' ]:\n+        for attr_name in [\'gene_id\', \'transcript_id\',  # GTF\n+                          \'ID\', \'id\',  # GFF3\n+                          \'group\' ]:  # GFF (TODO)\n             name = self.attributes.get( attr_name, None )\n             if name is not None:\n                 break\n@@ -107,12 +105,13 @@\n     def parse_row( se'..b'ibutes.get( \'Parent\', None )\n         # For GTF.\n-        feature_gene_id = self.seed_interval.attributes.get( \'gene_id\', None )\n         feature_transcript_id = self.seed_interval.attributes.get( \'transcript_id\', None )\n \n         # Read all intervals associated with seed.\n@@ -256,9 +253,9 @@\n         self.seed_interval_line_len = len( self.current_line )\n \n         # Return feature.\n-        feature = GFFFeature( self, self.chrom_col, self.feature_col, self.start_col, \\\n-                              self.end_col, self.strand_col, self.score_col, \\\n-                              self.default_strand, fix_strand=self.fix_strand, \\\n+        feature = GFFFeature( self, self.chrom_col, self.feature_col, self.start_col,\n+                              self.end_col, self.strand_col, self.score_col,\n+                              self.default_strand, fix_strand=self.fix_strand,\n                               intervals=feature_intervals, raw_size=raw_size )\n \n         # Convert to BED coords?\n@@ -267,6 +264,7 @@\n \n         return feature\n \n+\n def convert_bed_coords_to_gff( interval ):\n     """\n     Converts an interval object\'s coordinates from BED format to GFF format.\n@@ -279,10 +277,11 @@\n         if isinstance( interval, GFFFeature ):\n             for subinterval in interval.intervals:\n                 convert_bed_coords_to_gff( subinterval )\n-    elif type ( interval ) is list:\n+    elif type( interval ) is list:\n         interval[ 0 ] += 1\n     return interval\n \n+\n def convert_gff_coords_to_bed( interval ):\n     """\n     Converts an interval object\'s coordinates from GFF format to BED format.\n@@ -295,10 +294,11 @@\n         if isinstance( interval, GFFFeature ):\n             for subinterval in interval.intervals:\n                 convert_gff_coords_to_bed( subinterval )\n-    elif type ( interval ) is list:\n+    elif type( interval ) is list:\n         interval[ 0 ] -= 1\n     return interval\n \n+\n def parse_gff_attributes( attr_str ):\n     """\n     Parses a GFF/GTF attribute string and returns a dictionary of name-value\n@@ -340,6 +340,7 @@\n         attributes[\'group\'] = attr_str\n     return attributes\n \n+\n def gff_attributes_to_str( attrs, gff_format ):\n     """\n     Convert GFF attributes to string. Supported formats are GFF3, GTF.\n@@ -363,6 +364,7 @@\n         attrs_strs.append( format_string % ( name, value ) )\n     return " ; ".join( attrs_strs )\n \n+\n def read_unordered_gtf( iterator, strict=False ):\n     """\n     Returns GTF features found in an iterator. GTF lines need not be ordered\n@@ -382,7 +384,6 @@\n         # datasources, such as RefGenes in UCSC.\n         key_fn = lambda fields: fields[0] + \'_\' + get_transcript_id( fields )\n \n-\n     # Aggregate intervals by transcript_id and collect comments.\n     feature_intervals = odict()\n     comments = []\n@@ -403,7 +404,7 @@\n     chroms_features = {}\n     for count, intervals in enumerate( feature_intervals.values() ):\n         # Sort intervals by start position.\n-        intervals.sort( lambda a,b: cmp( a.start, b.start ) )\n+        intervals.sort( lambda a, b: cmp( a.start, b.start ) )\n         feature = GFFFeature( None, intervals=intervals )\n         if feature.chrom not in chroms_features:\n             chroms_features[ feature.chrom ] = []\n@@ -413,9 +414,9 @@\n     chroms_features_sorted = []\n     for chrom_features in chroms_features.values():\n         chroms_features_sorted.append( chrom_features )\n-    chroms_features_sorted.sort( lambda a,b: cmp( a[0].chrom, b[0].chrom ) )\n+    chroms_features_sorted.sort( lambda a, b: cmp( a[0].chrom, b[0].chrom ) )\n     for features in chroms_features_sorted:\n-        features.sort( lambda a,b: cmp( a.start, b.start ) )\n+        features.sort( lambda a, b: cmp( a.start, b.start ) )\n \n     # Yield comments first, then features.\n     # FIXME: comments can appear anywhere in file, not just the beginning.\n@@ -427,4 +428,3 @@\n     for chrom_features in chroms_features_sorted:\n         for feature in chrom_features:\n             yield feature\n-\n'
b
diff -r 90100b587723 -r 8307665c4b6c utils/odict.py
--- a/utils/odict.py Tue Apr 01 10:52:59 2014 -0400
+++ b/utils/odict.py Wed Nov 11 12:48:18 2015 -0500
[
@@ -4,6 +4,7 @@
 
 from UserDict import UserDict
 
+
 class odict(UserDict):
     """
     http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
@@ -12,7 +13,7 @@
     added. Calling keys(), values(), items(), etc. will return results in this
     order.
     """
-    def __init__( self, dict = None ):
+    def __init__( self, dict=None ):
         self._keys = []
         UserDict.__init__( self, dict )