Mercurial > repos > iuc > extract_genomic_dna

--- a/extract_genomic_dna.py	Mon Jul 03 07:19:41 2017 -0400
+++ b/extract_genomic_dna.py	Mon Oct 23 13:26:18 2017 -0400
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+from __future__ import print_function
+
 import argparse
 import os

@@ -81,7 +83,7 @@
                     start, end = egdu.convert_gff_coords_to_bed([start, end])
                 if includes_strand_col:
                     strand = fields[strand_col]
-            except:
+            except Exception:
                 warning = "Invalid chrom, start or end column values. "
                 warnings.append(warning)
                 if not invalid_lines:
@@ -129,7 +131,7 @@
                     sequence += twobitfile[interval.chrom][interval.start:interval.end]
             else:
                 sequence = twobitfile[chrom][start:end]
-        except:
+        except Exception:
             warning = "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. " % (start, end - start, chrom)
             warnings.append(warning)
             if not invalid_lines:
@@ -156,8 +158,6 @@
     if includes_strand_col and strand == "-":
         sequence = egdu.reverse_complement(sequence)
     if args.output_format == "fasta":
-        l = len(sequence)
-        c = 0
         if input_is_gff:
             start, end = egdu.convert_bed_coords_to_gff([start, end])
         if args.fasta_header_type == "bedtools_getfasta_default":
@@ -175,8 +175,10 @@
                 out.write(">%s %s\n" % (meta_data, name))
             else:
                 out.write(">%s\n" % meta_data)
-        while c < l:
-            b = min(c + 50, l)
+        c = 0
+        sequence_length = len(sequence)
+        while c < sequence_length:
+            b = min(c + 50, sequence_length)
             out.write("%s\n" % str(sequence[c:b]))
             c = b
     else:
@@ -209,10 +211,10 @@
 if warnings:
     warn_msg = "%d warnings, 1st is: " % len(warnings)
     warn_msg += warnings[0]
-    print warn_msg
+    print(warn_msg)
 if skipped_lines:
     # Error message includes up to the first 10 skipped lines.
-    print 'Skipped %d invalid lines, 1st is #%d, "%s"' % (skipped_lines, first_invalid_line, '\n'.join(invalid_lines[:10]))
+    print('Skipped %d invalid lines, 1st is #%d, "%s"' % (skipped_lines, first_invalid_line, '\n'.join(invalid_lines[:10])))

 if args.reference_genome_source == "history":
     os.remove(seq_path)
--- a/extract_genomic_dna.xml	Mon Jul 03 07:19:41 2017 -0400
+++ b/extract_genomic_dna.xml	Mon Oct 23 13:26:18 2017 -0400
@@ -2,6 +2,7 @@
     <description>using coordinates from assembled/unassembled genomes</description>
     <requirements>
         <requirement type="package" version="0.7.1">bx-python</requirement>
+        <requirement type="package" version="1.10.0">six</requirement>
         <requirement type="package" version="324">ucsc-fatotwobit</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
--- a/extract_genomic_dna_utils.py	Mon Jul 03 07:19:41 2017 -0400
+++ b/extract_genomic_dna_utils.py	Mon Oct 23 13:26:18 2017 -0400
@@ -6,6 +6,7 @@

 from bx.intervals.io import Comment, GenomicInterval, Header
 from bx.intervals.io import GenomicIntervalReader, NiceReaderWrapper, ParseError
+from six import Iterator

 # Default chrom, start, end, strand cols for a bed file
 BED_DEFAULT_COLS = 0, 1, 2, 5
@@ -99,7 +100,7 @@
         return lines


-class GFFReaderWrapper(NiceReaderWrapper):
+class GFFReaderWrapper(Iterator, NiceReaderWrapper):
     """
     Reader wrapper for GFF files which has two major functions:
     1. group entries for GFF file (via group column), GFF3 (via id attribute),
@@ -128,12 +129,12 @@
                                fix_strand=self.fix_strand)
         return interval

-    def next(self):
+    def __next__(self):
         """
         Returns next GFFFeature.
         """

-        def handle_parse_error(parse_error):
+        def handle_parse_error(e):
             """
             Actions to take when ParseError found.
             """
@@ -367,7 +368,7 @@
         # looks something like 1,2,3,
         if cols.endswith(','):
             cols += '0'
-        col_list = map(lambda x: int(x) - 1, cols.split(","))
+        col_list = [int(x) - 1 for x in cols.split(",")]
         return col_list
     else:
         return BED_DEFAULT_COLS