Previous changeset 0:90100b587723 (2014-04-01) Next changeset 2:a09d13b108fd (2017-06-22) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit a1517c9d22029095120643bbe2c8fa53754dd2b7 |
modified:
flanking_features.py tool_dependencies.xml utils/gff_util.py utils/odict.py |
b |
diff -r 90100b587723 -r 8307665c4b6c flanking_features.py --- a/flanking_features.py Tue Apr 01 10:52:59 2014 -0400 +++ b/flanking_features.py Wed Nov 11 12:48:18 2015 -0500 |
[ |
b'@@ -10,21 +10,23 @@\n -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval\n """\n \n-import sys, traceback, fileinput\n-from warnings import warn\n+import fileinput\n+import sys\n from bx.cookbook import doc_optparse\n-from galaxy.tools.util.galaxyops import *\n-from bx.intervals.io import *\n+from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper\n from bx.intervals.operations import quicksect\n-from utils.gff_util import *\n+from bx.tabular.io import ParseError\n+from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped\n+from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper\n \n assert sys.version_info[:2] >= ( 2, 4 )\n \n-def get_closest_feature (node, direction, threshold_up, threshold_down, report_func_up, report_func_down):\n+\n+def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down):\n #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases\n #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand\n #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand\n- if direction == 1: \n+ if direction == 1:\n if node.maxend <= threshold_up:\n if node.end == node.maxend:\n report_func_up(node)\n@@ -60,10 +62,11 @@\n if node.right:\n get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)\n \n+\n def proximal_region_finder(readers, region, comments=True):\n """\n- Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ]. \n- Intervals are GenomicInterval objects. \n+ Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ].\n+ Intervals are GenomicInterval objects.\n """\n primary = readers[0]\n features = readers[1]\n@@ -76,13 +79,13 @@\n up, down = True, True\n if region == \'Either\':\n either = True\n- \n+\n # Read features into memory:\n rightTree = quicksect.IntervalTree()\n for item in features:\n if type( item ) is GenomicInterval:\n rightTree.insert( item, features.linenum, item )\n- \n+\n for interval in primary:\n if type( interval ) is Header:\n yield interval\n@@ -96,33 +99,33 @@\n if chrom not in rightTree.chroms:\n continue\n else:\n- root = rightTree.chroms[chrom] #root node for the chrom tree\n+ root = rightTree.chroms[chrom] # root node for the chrom tree\n result_up = []\n result_down = []\n- if (strand == \'+\' and up) or (strand == \'-\' and down): \n+ if (strand == \'+\' and up) or (strand == \'-\' and down):\n #upstream +ve strand and downstream -ve strand cases\n- get_closest_feature (root, 1, start, None, lambda node: result_up.append( node ), None)\n- \n+ get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None)\n+\n if (strand == \'+\' and down) or (strand == \'-\' and up):\n #downstream +ve strand and upstream -ve strand case\n- get_closest_feature (root, 0, None, end-1, None, lambda node: result_down.append( node ))\n- \n+ get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node ))\n+\n if result_up:\n- if len(result_up) > 1: #The results_up list has a list of intervals upstream to the given interval. \n+ if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval.\n ends = ['..b' \n- if result_down: \n+\n+ if result_down:\n if not(either):\n #The last element of result_down will be the closest element to the given interval\n- yield [ interval, result_down[-1].other ] \n- \n+ yield [ interval, result_down[-1].other ]\n+\n if either and (result_up or result_down):\n iter_val = []\n if result_up and result_down:\n@@ -137,7 +140,8 @@\n #The last element of result_down will be the closest element to the given interval\n iter_val = [ interval, result_down[-1].other ]\n yield iter_val\n- \n+\n+\n def main():\n options, args = doc_optparse.parse( __doc__ )\n try:\n@@ -148,7 +152,7 @@\n in_fname, in2_fname, out_fname, direction = args\n except:\n doc_optparse.exception()\n- \n+\n # Set readers to handle either GFF or default format.\n if in1_gff_format:\n in1_reader_wrapper = GFFIntervalToBEDReaderWrapper\n@@ -160,22 +164,22 @@\n in2_reader_wrapper = NiceReaderWrapper\n \n g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),\n- chrom_col=chr_col_1,\n- start_col=start_col_1,\n- end_col=end_col_1,\n- strand_col=strand_col_1,\n- fix_strand=True )\n+ chrom_col=chr_col_1,\n+ start_col=start_col_1,\n+ end_col=end_col_1,\n+ strand_col=strand_col_1,\n+ fix_strand=True )\n g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),\n- chrom_col=chr_col_2,\n- start_col=start_col_2,\n- end_col=end_col_2,\n- strand_col=strand_col_2,\n- fix_strand=True )\n+ chrom_col=chr_col_2,\n+ start_col=start_col_2,\n+ end_col=end_col_2,\n+ strand_col=strand_col_2,\n+ fix_strand=True )\n \n # Find flanking features.\n out_file = open( out_fname, "w" )\n try:\n- for result in proximal_region_finder([g1,g2], direction):\n+ for result in proximal_region_finder([g1, g2], direction):\n if type( result ) is list:\n line, closest_feature = result\n # Need to join outputs differently depending on file types.\n@@ -185,12 +189,12 @@\n # Invervals are in BED coordinates; need to convert to GFF.\n line = convert_bed_coords_to_gff( line )\n closest_feature = convert_bed_coords_to_gff( closest_feature )\n- \n+\n # Replace double quotes with single quotes in closest feature\'s attributes.\n- out_file.write( "%s closest_feature \\"%s\\" \\n" % \n- ( "\\t".join( line.fields ), \\\n+ out_file.write( "%s closest_feature \\"%s\\" \\n" %\n+ ( "\\t".join( line.fields ),\n "\\t".join( closest_feature.fields ).replace( "\\"", "\\\\\\"" )\n- ) )\n+ ) )\n else:\n # Output is BED + closest feature fields.\n output_line_fields = []\n@@ -202,7 +206,7 @@\n except ParseError, exc:\n fail( "Invalid file format: %s" % str( exc ) )\n \n- print "Direction: %s" %(direction)\n+ print "Direction: %s" % (direction)\n if g1.skipped > 0:\n print skipped( g1, filedesc=" of 1st dataset" )\n if g2.skipped > 0:\n' |
b |
diff -r 90100b587723 -r 8307665c4b6c tool_dependencies.xml --- a/tool_dependencies.xml Tue Apr 01 10:52:59 2014 -0400 +++ b/tool_dependencies.xml Wed Nov 11 12:48:18 2015 -0500 |
b |
@@ -1,9 +1,9 @@ <?xml version="1.0"?> <tool_dependency> <package name="bx-python" version="0.7.1"> - <repository changeset_revision="41eb9d9f667d" name="package_bx_python_0_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <package name="galaxy-ops" version="1.0.0"> - <repository changeset_revision="4e39032e4ec6" name="package_galaxy_ops_1_0_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="9cbb20b85c01" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency> |
b |
diff -r 90100b587723 -r 8307665c4b6c utils/gff_util.py --- a/utils/gff_util.py Tue Apr 01 10:52:59 2014 -0400 +++ b/utils/gff_util.py Wed Nov 11 12:48:18 2015 -0500 |
[ |
b'@@ -3,16 +3,17 @@\n """\n \n import copy\n-from bx.intervals.io import *\n-from bx.tabular.io import Header, Comment\n+from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper\n+from bx.tabular.io import Header, Comment, ParseError\n from utils.odict import odict\n \n+\n class GFFInterval( GenomicInterval ):\n """\n A GFF interval, including attributes. If file is strictly a GFF file,\n only attribute is \'group.\'\n """\n- def __init__( self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4, \\\n+ def __init__( self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4,\n strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False ):\n # HACK: GFF format allows \'.\' for strand but GenomicInterval does not. To get around this,\n # temporarily set strand and then unset after initing GenomicInterval.\n@@ -20,7 +21,7 @@\n if not fix_strand and fields[ strand_col ] == \'.\':\n unknown_strand = True\n fields[ strand_col ] = \'+\'\n- GenomicInterval.__init__( self, reader, fields, chrom_col, start_col, end_col, strand_col, \\\n+ GenomicInterval.__init__( self, reader, fields, chrom_col, start_col, end_col, strand_col,\n default_strand, fix_strand=fix_strand )\n if unknown_strand:\n self.strand = \'.\'\n@@ -43,16 +44,17 @@\n return GFFInterval(self.reader, list( self.fields ), self.chrom_col, self.feature_col, self.start_col,\n self.end_col, self.strand_col, self.score_col, self.strand)\n \n+\n class GFFFeature( GFFInterval ):\n """\n A GFF feature, which can include multiple intervals.\n """\n- def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4, \\\n- strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False, intervals=[], \\\n+ def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4,\n+ strand_col=6, score_col=5, default_strand=\'.\', fix_strand=False, intervals=[],\n raw_size=0 ):\n # Use copy so that first interval and feature do not share fields.\n- GFFInterval.__init__( self, reader, copy.deepcopy( intervals[0].fields ), chrom_col, feature_col, \\\n- start_col, end_col, strand_col, score_col, default_strand, \\\n+ GFFInterval.__init__( self, reader, copy.deepcopy( intervals[0].fields ), chrom_col, feature_col,\n+ start_col, end_col, strand_col, score_col, default_strand,\n fix_strand=fix_strand )\n self.intervals = intervals\n self.raw_size = raw_size\n@@ -60,7 +62,7 @@\n for interval in self.intervals:\n # Error checking. NOTE: intervals need not share the same strand.\n if interval.chrom != self.chrom:\n- raise ValueError( "interval chrom does not match self chrom: %s != %s" % \\\n+ raise ValueError( "interval chrom does not match self chrom: %s != %s" %\n ( interval.chrom, self.chrom ) )\n # Set start, end of interval.\n if interval.start < self.start:\n@@ -72,13 +74,9 @@\n """ Returns feature\'s name. """\n name = None\n # Preference for name: GTF, GFF3, GFF.\n- for attr_name in [\n- # GTF:\n- \'gene_id\', \'transcript_id\',\n- # GFF3:\n- \'ID\', \'id\',\n- # GFF (TODO):\n- \'group\' ]:\n+ for attr_name in [\'gene_id\', \'transcript_id\', # GTF\n+ \'ID\', \'id\', # GFF3\n+ \'group\' ]: # GFF (TODO)\n name = self.attributes.get( attr_name, None )\n if name is not None:\n break\n@@ -107,12 +105,13 @@\n def parse_row( se'..b'ibutes.get( \'Parent\', None )\n # For GTF.\n- feature_gene_id = self.seed_interval.attributes.get( \'gene_id\', None )\n feature_transcript_id = self.seed_interval.attributes.get( \'transcript_id\', None )\n \n # Read all intervals associated with seed.\n@@ -256,9 +253,9 @@\n self.seed_interval_line_len = len( self.current_line )\n \n # Return feature.\n- feature = GFFFeature( self, self.chrom_col, self.feature_col, self.start_col, \\\n- self.end_col, self.strand_col, self.score_col, \\\n- self.default_strand, fix_strand=self.fix_strand, \\\n+ feature = GFFFeature( self, self.chrom_col, self.feature_col, self.start_col,\n+ self.end_col, self.strand_col, self.score_col,\n+ self.default_strand, fix_strand=self.fix_strand,\n intervals=feature_intervals, raw_size=raw_size )\n \n # Convert to BED coords?\n@@ -267,6 +264,7 @@\n \n return feature\n \n+\n def convert_bed_coords_to_gff( interval ):\n """\n Converts an interval object\'s coordinates from BED format to GFF format.\n@@ -279,10 +277,11 @@\n if isinstance( interval, GFFFeature ):\n for subinterval in interval.intervals:\n convert_bed_coords_to_gff( subinterval )\n- elif type ( interval ) is list:\n+ elif type( interval ) is list:\n interval[ 0 ] += 1\n return interval\n \n+\n def convert_gff_coords_to_bed( interval ):\n """\n Converts an interval object\'s coordinates from GFF format to BED format.\n@@ -295,10 +294,11 @@\n if isinstance( interval, GFFFeature ):\n for subinterval in interval.intervals:\n convert_gff_coords_to_bed( subinterval )\n- elif type ( interval ) is list:\n+ elif type( interval ) is list:\n interval[ 0 ] -= 1\n return interval\n \n+\n def parse_gff_attributes( attr_str ):\n """\n Parses a GFF/GTF attribute string and returns a dictionary of name-value\n@@ -340,6 +340,7 @@\n attributes[\'group\'] = attr_str\n return attributes\n \n+\n def gff_attributes_to_str( attrs, gff_format ):\n """\n Convert GFF attributes to string. Supported formats are GFF3, GTF.\n@@ -363,6 +364,7 @@\n attrs_strs.append( format_string % ( name, value ) )\n return " ; ".join( attrs_strs )\n \n+\n def read_unordered_gtf( iterator, strict=False ):\n """\n Returns GTF features found in an iterator. GTF lines need not be ordered\n@@ -382,7 +384,6 @@\n # datasources, such as RefGenes in UCSC.\n key_fn = lambda fields: fields[0] + \'_\' + get_transcript_id( fields )\n \n-\n # Aggregate intervals by transcript_id and collect comments.\n feature_intervals = odict()\n comments = []\n@@ -403,7 +404,7 @@\n chroms_features = {}\n for count, intervals in enumerate( feature_intervals.values() ):\n # Sort intervals by start position.\n- intervals.sort( lambda a,b: cmp( a.start, b.start ) )\n+ intervals.sort( lambda a, b: cmp( a.start, b.start ) )\n feature = GFFFeature( None, intervals=intervals )\n if feature.chrom not in chroms_features:\n chroms_features[ feature.chrom ] = []\n@@ -413,9 +414,9 @@\n chroms_features_sorted = []\n for chrom_features in chroms_features.values():\n chroms_features_sorted.append( chrom_features )\n- chroms_features_sorted.sort( lambda a,b: cmp( a[0].chrom, b[0].chrom ) )\n+ chroms_features_sorted.sort( lambda a, b: cmp( a[0].chrom, b[0].chrom ) )\n for features in chroms_features_sorted:\n- features.sort( lambda a,b: cmp( a.start, b.start ) )\n+ features.sort( lambda a, b: cmp( a.start, b.start ) )\n \n # Yield comments first, then features.\n # FIXME: comments can appear anywhere in file, not just the beginning.\n@@ -427,4 +428,3 @@\n for chrom_features in chroms_features_sorted:\n for feature in chrom_features:\n yield feature\n-\n' |
b |
diff -r 90100b587723 -r 8307665c4b6c utils/odict.py --- a/utils/odict.py Tue Apr 01 10:52:59 2014 -0400 +++ b/utils/odict.py Wed Nov 11 12:48:18 2015 -0500 |
[ |
@@ -4,6 +4,7 @@ from UserDict import UserDict + class odict(UserDict): """ http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747 @@ -12,7 +13,7 @@ added. Calling keys(), values(), items(), etc. will return results in this order. """ - def __init__( self, dict = None ): + def __init__( self, dict=None ): self._keys = [] UserDict.__init__( self, dict ) |