Mercurial > repos > devteam > flanking_features
changeset 2:a09d13b108fd draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author | devteam |
---|---|
date | Thu, 22 Jun 2017 18:41:16 -0400 |
parents | 8307665c4b6c |
children | 9f12b7e500f1 |
files | flanking_features.py flanking_features.xml macros.xml tool_dependencies.xml utils/__init__.pyc utils/gff_util.py utils/gff_util.pyc utils/odict.pyc |
diffstat | 8 files changed, 139 insertions(+), 121 deletions(-) [+] |
line wrap: on
line diff
--- a/flanking_features.py Wed Nov 11 12:48:18 2015 -0500 +++ b/flanking_features.py Thu Jun 22 18:41:16 2017 -0400 @@ -1,5 +1,5 @@ #!/usr/bin/env python -#By: Guruprasad Ananda +# By: Guruprasad Ananda """ Fetch closest up/downstream interval from features corresponding to every interval in primary @@ -9,23 +9,26 @@ -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval """ +from __future__ import print_function import fileinput import sys + from bx.cookbook import doc_optparse from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper from bx.intervals.operations import quicksect from bx.tabular.io import ParseError from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped + from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper assert sys.version_info[:2] >= ( 2, 4 ) def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): - #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases - #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand - #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand + # direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases + # threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand + # threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand if direction == 1: if node.maxend <= threshold_up: if node.end == node.maxend: @@ -103,11 +106,11 @@ result_up = [] result_down = [] if (strand == '+' and up) or (strand == '-' and down): - #upstream +ve strand and downstream -ve strand cases + # upstream +ve strand and downstream -ve strand cases get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) if (strand == '+' and down) or (strand == '-' and up): - #downstream +ve strand and upstream -ve strand case + # downstream +ve strand and upstream -ve strand case get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) if result_up: @@ -123,7 +126,7 @@ if result_down: if not(either): - #The last element of result_down will be the closest element to the given interval + # The last element of result_down will be the closest element to the given interval yield [ interval, result_down[-1].other ] if either and (result_up or result_down): @@ -132,12 +135,12 @@ if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): iter_val = [ interval, result_up[res_ind].other ] else: - #The last element of result_down will be the closest element to the given interval + # The last element of result_down will be the closest element to the given interval iter_val = [ interval, result_down[-1].other ] elif result_up: iter_val = [ interval, result_up[res_ind].other ] elif result_down: - #The last element of result_down will be the closest element to the given interval + # The last element of result_down will be the closest element to the given interval iter_val = [ interval, result_down[-1].other ] yield iter_val @@ -203,14 +206,15 @@ out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) else: out_file.write( "%s\n" % result ) - except ParseError, exc: + except ParseError as exc: fail( "Invalid file format: %s" % str( exc ) ) - print "Direction: %s" % (direction) + print("Direction: %s" % (direction)) if g1.skipped > 0: - print skipped( g1, filedesc=" of 1st dataset" ) + print(skipped( g1, filedesc=" of 1st dataset" )) if g2.skipped > 0: - print skipped( g2, filedesc=" of 2nd dataset" ) + print(skipped( g2, filedesc=" of 2nd dataset" )) + if __name__ == "__main__": main()
--- a/flanking_features.xml Wed Nov 11 12:48:18 2015 -0500 +++ b/flanking_features.xml Thu Jun 22 18:41:16 2017 -0400 @@ -1,86 +1,87 @@ <tool id="flanking_features_1" name="Fetch closest non-overlapping feature" version="4.0.1"> - <description> for every interval</description> - <requirements> - <requirement type="package" version="0.7.1">bx-python</requirement> - <requirement type="package" version="1.0.0">galaxy-ops</requirement> - </requirements> - <command interpreter="python"> - flanking_features.py $input1 $input2 $out_file1 $direction - - #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): - -1 1,4,5,7 --gff1 - #else: - -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} - #end if - - #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): - -2 1,4,5,7 --gff2 - #else: - -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} - #end if - </command> - <inputs> - <param format="interval,gff" name="input1" type="data" label="For every interval in"/> - <param format="interval,gff" name="input2" type="data" label="Fetch closest feature(s) from"/> - <param name="direction" type="select" label="Located"> - <option value="Either">Either Upstream or Downstream</option> - <option value="Both">Both Upstream and Downstream</option> - <option value="Upstream">Upstream</option> - <option value="Downstream">Downstream</option> - </param> - </inputs> - <outputs> - <data format="input" name="out_file1" metadata_source="input1"/> - </outputs> - <tests> - <test> - <param name="input1" value="4_windows.bed"/> - <param name="input2" value="4_windows_2.bed"/> - <param name="direction" value="Either"/> - <output name="out_file1" file="closest_features_either.interval"/> - </test> - <test> - <param name="input1" value="4_windows.bed"/> - <param name="input2" value="4_windows_2.bed"/> - <param name="direction" value="Both"/> - <output name="out_file1" file="closest_features.interval"/> - </test> - <test> - <param name="input1" value="4_windows.bed"/> - <param name="input2" value="4_windows_2.bed"/> - <param name="direction" value="Upstream"/> - <output name="out_file1" file="closest_features_up.interval"/> - </test> - <test> - <param name="input1" value="4_windows.bed"/> - <param name="input2" value="4_windows_2.bed"/> - <param name="direction" value="Downstream"/> - <output name="out_file1" file="closest_features_down.interval"/> - </test> - <test> - <param name="input1" value="4_windows.bed"/> - <param name="input2" value="4_windows_3.bed"/> - <param name="direction" value="Both"/> - <output name="out_file1" file="closest_features_both.interval"/> - </test> - <!-- Tests for GFF functionality. --> + <description>for every interval</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command><![CDATA[ +python '$__tool_directory__/flanking_features.py' +'$input1' +'$input2' +'$out_file1' +$direction + +#if $input1.is_of_type('gff') + -1 1,4,5,7 --gff1 +#else: + -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} +#end if - <test> - <param name="input1" value="4_windows.bed"/> - <param name="input2" value="4_windows_2.gff"/> - <param name="direction" value="Either"/> - <output name="out_file1" file="closest_features_both.gff"/> - </test> - <test> - <param name="input1" value="4_windows.gff"/> - <param name="input2" value="4_windows_2.gff"/> - <param name="direction" value="Either"/> - <output name="out_file1" file="closest_features_both2.gff"/> - </test> - - </tests> - <help> - +#if $input2.is_of_type('gff') + -2 1,4,5,7 --gff2 +#else: + -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} +#end if + ]]></command> + <inputs> + <param name="input1" type="data" format="interval,gff" label="For every interval in"/> + <param name="input2" type="data" format="interval,gff" label="Fetch closest feature(s) from"/> + <param name="direction" type="select" label="Located"> + <option value="Either">Either Upstream or Downstream</option> + <option value="Both">Both Upstream and Downstream</option> + <option value="Upstream">Upstream</option> + <option value="Downstream">Downstream</option> + </param> + </inputs> + <outputs> + <data name="out_file1" format_source="input1" metadata_source="input1"/> + </outputs> + <tests> + <test> + <param name="input1" value="4_windows.bed"/> + <param name="input2" value="4_windows_2.bed"/> + <param name="direction" value="Either"/> + <output name="out_file1" file="closest_features_either.interval"/> + </test> + <test> + <param name="input1" value="4_windows.bed"/> + <param name="input2" value="4_windows_2.bed"/> + <param name="direction" value="Both"/> + <output name="out_file1" file="closest_features.interval"/> + </test> + <test> + <param name="input1" value="4_windows.bed"/> + <param name="input2" value="4_windows_2.bed"/> + <param name="direction" value="Upstream"/> + <output name="out_file1" file="closest_features_up.interval"/> + </test> + <test> + <param name="input1" value="4_windows.bed"/> + <param name="input2" value="4_windows_2.bed"/> + <param name="direction" value="Downstream"/> + <output name="out_file1" file="closest_features_down.interval"/> + </test> + <test> + <param name="input1" value="4_windows.bed"/> + <param name="input2" value="4_windows_3.bed"/> + <param name="direction" value="Both"/> + <output name="out_file1" file="closest_features_both.interval"/> + </test> + <!-- Tests for GFF functionality. --> + <test> + <param name="input1" value="4_windows.bed"/> + <param name="input2" value="4_windows_2.gff" ftype="gff" /> + <param name="direction" value="Either"/> + <output name="out_file1" file="closest_features_both.gff"/> + </test> + <test> + <param name="input1" value="4_windows.gff" ftype="gff" /> + <param name="input2" value="4_windows_2.gff" ftype="gff" /> + <param name="direction" value="Either"/> + <output name="out_file1" file="closest_features_both2.gff"/> + </test> + </tests> + <help><![CDATA[ .. class:: infomark **What it does** @@ -91,7 +92,7 @@ .. class:: warningmark -**Note:** +**Note:** Every line should contain at least 3 columns: chromosome number, start and stop coordinates. If any of these columns is missing or if start and stop coordinates are not numerical, the lines will be treated as invalid and skipped. The number of skipped lines is documented in the resulting history item as a "data issue". @@ -124,8 +125,5 @@ chr1 500 1000 Query1.2 chr1 2000 2204 Query2.4 chr1 1100 1250 Query1.3 chr1 580 1050 Query2.3 chr1 1100 1250 Query1.3 chr1 2000 2204 Query2.4 - -</help> - - -</tool> \ No newline at end of file + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 22 18:41:16 2017 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="0.7.1">bx-python</requirement> + <requirement type="package" version="1.0.0">galaxy-ops</requirement> + </requirements> + </xml> + <token name="@SCREENCASTS@"> +----- + +**Screencasts!** + +See Galaxy Interval Operation Screencasts_ (right click to open this link in another window). + +.. _Screencasts: https://galaxyproject.org/learn/interval-operations/ + +----- + </token> +</macros>
--- a/tool_dependencies.xml Wed Nov 11 12:48:18 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="bx-python" version="0.7.1"> - <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="galaxy-ops" version="1.0.0"> - <repository changeset_revision="9cbb20b85c01" name="package_galaxy_ops_1_0_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>
--- a/utils/gff_util.py Wed Nov 11 12:48:18 2015 -0500 +++ b/utils/gff_util.py Thu Jun 22 18:41:16 2017 -0400 @@ -1,11 +1,12 @@ """ Provides utilities for working with GFF files. """ +import copy -import copy from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper -from bx.tabular.io import Header, Comment, ParseError -from utils.odict import odict +from bx.tabular.io import Comment, Header, ParseError + +from .odict import odict class GFFInterval( GenomicInterval ): @@ -144,7 +145,7 @@ self.default_strand, fix_strand=self.fix_strand ) return interval - def next( self ): + def __next__( self ): """ Returns next GFFFeature. """ # @@ -177,10 +178,10 @@ while not self.seed_interval: try: self.seed_interval = GenomicIntervalReader.next( self ) - except ParseError, e: + except ParseError as e: handle_parse_error( e ) # TODO: When no longer supporting python 2.4 use finally: - #finally: + # finally: raw_size += len( self.current_line ) # If header or comment, clear seed interval and return it with its size. @@ -205,19 +206,19 @@ try: interval = GenomicIntervalReader.next( self ) raw_size += len( self.current_line ) - except StopIteration, e: + except StopIteration as e: # No more intervals to read, but last feature needs to be # returned. interval = None raw_size += len( self.current_line ) break - except ParseError, e: + except ParseError as e: handle_parse_error( e ) raw_size += len( self.current_line ) continue # TODO: When no longer supporting python 2.4 use finally: - #finally: - #raw_size += len( self.current_line ) + # finally: + # raw_size += len( self.current_line ) # Ignore comments. if isinstance( interval, Comment ): @@ -263,6 +264,7 @@ convert_gff_coords_to_bed( feature ) return feature + next = __next__ # This line should be removed once the bx-python port to Python3 is finished def convert_bed_coords_to_gff( interval ): @@ -374,7 +376,9 @@ # -- Get function that generates line/feature key. -- - get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ 'transcript_id' ] + def get_transcript_id(fields): + return parse_gff_attributes( fields[8] )[ 'transcript_id' ] + if strict: # Strict GTF parsing uses transcript_id only to group lines into feature. key_fn = get_transcript_id @@ -382,7 +386,8 @@ # Use lenient parsing where chromosome + transcript_id is the key. This allows # transcripts with same ID on different chromosomes; this occurs in some popular # datasources, such as RefGenes in UCSC. - key_fn = lambda fields: fields[0] + '_' + get_transcript_id( fields ) + def key_fn(fields): + return fields[0] + '_' + get_transcript_id( fields ) # Aggregate intervals by transcript_id and collect comments. feature_intervals = odict()