# HG changeset patch
# User galaxyp
# Date 1570479279 14400
# Node ID 9c4a48f5d4e737ea32830755492b50297b24ae8a
# Parent da1b538b87e52f3d48fddb5fe2d1d20d1b284c97
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 6babd357845126292cb202aaea0f70ff68819525"
diff -r da1b538b87e5 -r 9c4a48f5d4e7 retrieve_ensembl_bed.py
--- a/retrieve_ensembl_bed.py Mon Jan 22 13:13:47 2018 -0500
+++ b/retrieve_ensembl_bed.py Mon Oct 07 16:14:39 2019 -0400
@@ -34,6 +34,12 @@
help='Restrict Ensembl retrieval to regions e.g.:'
+ ' X,2:20000-25000,3:100-500+')
parser.add_argument(
+ '-i', '--interval_file', default=None,
+ help='Regions from a bed, gff, or interval file')
+ parser.add_argument(
+ '-f', '--interval_format', choices=['bed','gff','interval'], default='interval',
+ help='Interval format has TAB-separated columns: Seq, Start, End, Strand')
+ parser.add_argument(
'-B', '--biotypes', action='append', default=[],
help='Restrict Ensembl biotypes to retrieve')
parser.add_argument(
@@ -75,6 +81,27 @@
if args.debug:
print("selected_regions: %s" % selected_regions, file=sys.stderr)
+ if args.interval_file:
+ pat = r'^(?:chr)?([^\t]+)(?:\t(\d+)(?:\t(\d+)(?:\t([+-])?)?)?)?.*'
+ if args.interval_format == 'bed':
+ pat = r'^(?:chr)?([^\t]+)\t(\d+)\t(\d+)(?:(?:\t[^\t]+\t[^\t]+\t)([+-]))?.*'
+ elif args.interval_format == 'gff':
+ pat = r'^(?:chr)?([^\t]+)\t(\d+)\t(\d+)(?:(?:\t[^\t]+\t[^\t]+\t)([+-]))?.*'
+ with open(args.interval_file,'r') as fh:
+ for i, line in enumerate(fh):
+ if line.startswith('#'):
+ continue
+ m = re.match(pat, line.rstrip())
+ if m:
+ (chrom, start, end, strand) = m.groups()
+ if chrom:
+ if chrom not in selected_regions:
+ selected_regions[chrom] = []
+ selected_regions[chrom].append([start, end, strand])
+ if args.debug:
+ print("selected_regions: %s" % selected_regions, file=sys.stderr)
+
+
def retrieve_region(species, ref, start, stop, strand):
transcript_count = 0
regions = list(range(start, stop, max_region))
diff -r da1b538b87e5 -r 9c4a48f5d4e7 retrieve_ensembl_bed.xml
--- a/retrieve_ensembl_bed.xml Mon Jan 22 13:13:47 2018 -0500
+++ b/retrieve_ensembl_bed.xml Mon Oct 07 16:14:39 2019 -0400
@@ -20,6 +20,16 @@
#if $regions:
--regions '$regions'
#end if
+ #if $interval_file:
+ #if $interval_file.ext.find('bed') > -1
+ --interval_format bed
+ #elif $interval_file.ext in ['gff','gtf','gff3']
+ --interval_format gff
+ #else
+ --interval_format interval
+ #end if
+ --interval_file '$interval_file'
+ #end if
'$transcript_bed'
]]>
@@ -40,6 +50,7 @@
Each region is specifed as: chr or chr:pos or chr:from-to
^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$
+
@@ -60,6 +71,16 @@
+
+
+
+
+
+