# HG changeset patch
# User galaxyp
# Date 1570479279 14400
# Node ID 9c4a48f5d4e737ea32830755492b50297b24ae8a
# Parent  da1b538b87e52f3d48fddb5fe2d1d20d1b284c97
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 6babd357845126292cb202aaea0f70ff68819525"
diff -r da1b538b87e5 -r 9c4a48f5d4e7 retrieve_ensembl_bed.py
--- a/retrieve_ensembl_bed.py	Mon Jan 22 13:13:47 2018 -0500
+++ b/retrieve_ensembl_bed.py	Mon Oct 07 16:14:39 2019 -0400
@@ -34,6 +34,12 @@
         help='Restrict Ensembl retrieval to regions e.g.:'
              + ' X,2:20000-25000,3:100-500+')
     parser.add_argument(
+        '-i', '--interval_file', default=None,
+        help='Regions from a bed, gff, or interval file')
+    parser.add_argument(
+        '-f', '--interval_format', choices=['bed','gff','interval'], default='interval',
+        help='Interval format has TAB-separated columns: Seq, Start, End, Strand')
+    parser.add_argument(
         '-B', '--biotypes', action='append', default=[],
         help='Restrict Ensembl biotypes to retrieve')
     parser.add_argument(
@@ -75,6 +81,27 @@
         if args.debug:
             print("selected_regions: %s" % selected_regions, file=sys.stderr)
 
+    if args.interval_file:
+        pat = r'^(?:chr)?([^\t]+)(?:\t(\d+)(?:\t(\d+)(?:\t([+-])?)?)?)?.*'
+        if args.interval_format == 'bed':
+            pat = r'^(?:chr)?([^\t]+)\t(\d+)\t(\d+)(?:(?:\t[^\t]+\t[^\t]+\t)([+-]))?.*'
+        elif args.interval_format == 'gff':
+            pat = r'^(?:chr)?([^\t]+)\t(\d+)\t(\d+)(?:(?:\t[^\t]+\t[^\t]+\t)([+-]))?.*'
+        with open(args.interval_file,'r') as fh:
+            for i, line in enumerate(fh):
+                if line.startswith('#'):
+                    continue
+                m = re.match(pat, line.rstrip())
+                if m:
+                    (chrom, start, end, strand) = m.groups()
+                    if chrom:
+                        if chrom not in selected_regions:
+                            selected_regions[chrom] = []
+                        selected_regions[chrom].append([start, end, strand])
+        if args.debug:
+            print("selected_regions: %s" % selected_regions, file=sys.stderr)
+               
+
     def retrieve_region(species, ref, start, stop, strand):
         transcript_count = 0
         regions = list(range(start, stop, max_region))
diff -r da1b538b87e5 -r 9c4a48f5d4e7 retrieve_ensembl_bed.xml
--- a/retrieve_ensembl_bed.xml	Mon Jan 22 13:13:47 2018 -0500
+++ b/retrieve_ensembl_bed.xml	Mon Oct 07 16:14:39 2019 -0400
@@ -20,6 +20,16 @@
             #if $regions:
                 --regions '$regions'
             #end if
+            #if $interval_file:
+                #if $interval_file.ext.find('bed') > -1
+                    --interval_format bed
+                #elif $interval_file.ext in ['gff','gtf','gff3']
+                    --interval_format gff
+                #else
+                    --interval_format interval
+                #end if
+                --interval_file '$interval_file' 
+            #end if
             '$transcript_bed'
     ]]>
     
@@ -40,6 +50,7 @@
             Each region is specifed as: chr or chr:pos or chr:from-to
             ^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$
         
+        
     
     
         
@@ -60,6 +71,16 @@
                 
             
         
+        
+            
+            
+            
+            
+