Repository 'extract_genomic_dna'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/extract_genomic_dna

Changeset 11:80414c33a59a (2024-11-21)
Previous changeset 10:5cc8e93ee98f (2019-12-06)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 6db2d98b513e4980788fcba49d809c91e5750296
modified:
extract_genomic_dna.py
extract_genomic_dna.xml
extract_genomic_dna_utils.py
b
diff -r 5cc8e93ee98f -r 80414c33a59a extract_genomic_dna.py
--- a/extract_genomic_dna.py Fri Dec 06 15:24:00 2019 -0500
+++ b/extract_genomic_dna.py Thu Nov 21 07:20:29 2024 +0000
[
b'@@ -11,24 +11,47 @@\n import extract_genomic_dna_utils as egdu  # noqa: I100,I202\n \n parser = argparse.ArgumentParser()\n-parser.add_argument(\'--input_format\', dest=\'input_format\', help="Input dataset format")\n-parser.add_argument(\'--input\', dest=\'input\', help="Input dataset")\n-parser.add_argument(\'--genome\', dest=\'genome\', help="Input dataset genome build")\n-parser.add_argument(\'--interpret_features\', dest=\'interpret_features\', default=None, help="Interpret features if input format is gff")\n-parser.add_argument(\'--columns\', dest=\'columns\', help="Columns to use in input file")\n-parser.add_argument(\'--reference_genome_source\', dest=\'reference_genome_source\', help="Source of reference genome file")\n-parser.add_argument(\'--reference_genome\', dest=\'reference_genome\', help="Reference genome file")\n-parser.add_argument(\'--output_format\', dest=\'output_format\', help="Output format")\n-parser.add_argument(\'--fasta_header_type\', dest=\'fasta_header_type\', default=None, help="Fasta header format")\n-parser.add_argument(\'--fasta_header_delimiter\', dest=\'fasta_header_delimiter\', default=None, help="Fasta header field delimiter")\n-parser.add_argument(\'--output\', dest=\'output\', help="Output dataset")\n+parser.add_argument("--input_format", dest="input_format", help="Input dataset format")\n+parser.add_argument("--input", dest="input", help="Input dataset")\n+parser.add_argument("--genome", dest="genome", help="Input dataset genome build")\n+parser.add_argument(\n+    "--interpret_features",\n+    dest="interpret_features",\n+    default=None,\n+    help="Interpret features if input format is gff",\n+)\n+parser.add_argument("--columns", dest="columns", help="Columns to use in input file")\n+parser.add_argument(\n+    "--reference_genome_source",\n+    dest="reference_genome_source",\n+    help="Source of reference genome file",\n+)\n+parser.add_argument(\n+    "--reference_genome", dest="reference_genome", help="Reference genome file"\n+)\n+parser.add_argument("--output_format", dest="output_format", help="Output format")\n+parser.add_argument(\n+    "--fasta_header_type",\n+    dest="fasta_header_type",\n+    default=None,\n+    help="Fasta header format",\n+)\n+parser.add_argument(\n+    "--fasta_header_delimiter",\n+    dest="fasta_header_delimiter",\n+    default=None,\n+    help="Fasta header field delimiter",\n+)\n+parser.add_argument("--output", dest="output", help="Output dataset")\n args = parser.parse_args()\n \n-input_is_gff = args.input_format == \'gff\'\n+input_is_gff = args.input_format == "gff"\n interpret_features = input_is_gff and args.interpret_features == "yes"\n-if len(args.columns.split(\',\')) == 5:\n+if len(args.columns.split(",")) == 5:\n     # Bed file.\n-    chrom_col, start_col, end_col, strand_col, name_col = egdu.parse_cols_arg(args.columns)\n+    chrom_col, start_col, end_col, strand_col, name_col = egdu.parse_cols_arg(\n+        args.columns\n+    )\n else:\n     # Gff file.\n     chrom_col, start_col, end_col, strand_col = egdu.parse_cols_arg(args.columns)\n@@ -47,13 +70,13 @@\n first_invalid_line = 0\n invalid_lines = []\n warnings = []\n-warning = \'\'\n+warning = ""\n twobitfile = None\n line_count = 1\n file_iterator = open(args.input)\n if interpret_features:\n     file_iterator = egdu.GFFReaderWrapper(file_iterator, fix_strand=False)\n-out = open(args.output, \'wt\')\n+out = open(args.output, "wt")\n \n for feature in file_iterator:\n     # Ignore comments, headers.\n@@ -70,9 +93,9 @@\n         strand = feature.strand\n     else:\n         # Processing lines, either interval or GFF format.\n-        line = feature.rstrip(\'\\r\\n\')\n+        line = feature.rstrip("\\r\\n")\n         if line and not line.startswith("#"):\n-            fields = line.split(\'\\t\')\n+            fields = line.split("\\t")\n             try:\n                 chrom = fields[chrom_col]\n                 start = int(fields[start_col])\n@@ -99,9 +122,9 @@\n                     first_invalid_line = line_count\n                 skipped_lines += len(invalid_lines)\n                 continue\n-            if strand not in [\'+\', \'-'..b'    args.genome,\n+        )\n         warnings.append(warning)\n         if not invalid_lines:\n             invalid_lines = egdu.get_lines(feature)\n             first_invalid_line = line_count\n         skipped_lines += len(invalid_lines)\n         continue\n-    if sequence == \'\':\n-        warning = "Chrom: \'%s\', start: \'%d\', end: \'%d\' is either invalid or not present in build \'%s\'. " % (chrom, start, end, args.genome)\n+    if sequence == "":\n+        warning = (\n+            "Chrom: \'%s\', start: \'%d\', end: \'%d\' is either invalid or not present in build \'%s\'. "\n+            % (chrom, start, end, args.genome)\n+        )\n         warnings.append(warning)\n         if not invalid_lines:\n             invalid_lines = egdu.get_lines(feature)\n@@ -161,15 +200,18 @@\n         if input_is_gff:\n             start, end = egdu.convert_bed_coords_to_gff([start, end])\n         if args.fasta_header_type == "bedtools_getfasta_default":\n-            out.write(">%s\\n" % egdu.get_bedtools_getfasta_default_header(str(chrom),\n-                                                                          str(start),\n-                                                                          str(end),\n-                                                                          strand,\n-                                                                          includes_strand_col))\n+            out.write(\n+                ">%s\\n"\n+                % egdu.get_bedtools_getfasta_default_header(\n+                    str(chrom), str(start), str(end), strand, includes_strand_col\n+                )\n+            )\n         else:\n             # args.fasta_header_type == "char_delimited":\n             fields = [args.genome, str(chrom), str(start), str(end), strand]\n-            field_delimiter = egdu.get_fasta_header_delimiter(args.fasta_header_delimiter)\n+            field_delimiter = egdu.get_fasta_header_delimiter(\n+                args.fasta_header_delimiter\n+            )\n             meta_data = field_delimiter.join(fields)\n             if name.strip():\n                 out.write(">%s %s\\n" % (meta_data, name))\n@@ -184,20 +226,24 @@\n     else:\n         # output_format == "interval".\n         if interpret_features:\n-            meta_data = "\\t".join([feature.chrom,\n-                                   "galaxy_extract_genomic_dna",\n-                                   "interval",\n-                                   str(feature.start),\n-                                   str(feature.end),\n-                                   feature.score,\n-                                   feature.strand,\n-                                   ".",\n-                                   egdu.gff_attributes_to_str(feature.attributes, "GTF")])\n+            meta_data = "\\t".join(\n+                [\n+                    feature.chrom,\n+                    "galaxy_extract_genomic_dna",\n+                    "interval",\n+                    str(feature.start),\n+                    str(feature.end),\n+                    feature.score,\n+                    feature.strand,\n+                    ".",\n+                    egdu.gff_attributes_to_str(feature.attributes, "GTF"),\n+                ]\n+            )\n         else:\n             # Here fields was set up around line 73.\n             meta_data = "\\t".join(fields)\n         if input_is_gff:\n-            format_str = "%s seq \\"%s\\";\\n"\n+            format_str = \'%s seq "%s";\\n\'\n         else:\n             format_str = "%s\\t%s\\n"\n         out.write(format_str % (meta_data, str(sequence)))\n@@ -214,7 +260,10 @@\n     print(warn_msg)\n if skipped_lines:\n     # Error message includes up to the first 10 skipped lines.\n-    print(\'Skipped %d invalid lines, 1st is #%d, "%s"\' % (skipped_lines, first_invalid_line, \'\\n\'.join(invalid_lines[:10])))\n+    print(\n+        \'Skipped %d invalid lines, 1st is #%d, "%s"\'\n+        % (skipped_lines, first_invalid_line, "\\n".join(invalid_lines[:10]))\n+    )\n \n if args.reference_genome_source == "history":\n     os.remove(seq_path)\n'
b
diff -r 5cc8e93ee98f -r 80414c33a59a extract_genomic_dna.xml
--- a/extract_genomic_dna.xml Fri Dec 06 15:24:00 2019 -0500
+++ b/extract_genomic_dna.xml Thu Nov 21 07:20:29 2024 +0000
[
@@ -1,10 +1,13 @@
-<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.3+galaxy2">
+<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.3+galaxy3">
     <description>using coordinates from assembled/unassembled genomes</description>
     <requirements>
         <requirement type="package" version="0.7.1">bx-python</requirement>
         <requirement type="package" version="1.13.0">six</requirement>
         <requirement type="package" version="377">ucsc-fatotwobit</requirement>
     </requirements>
+    <required_files>
+        <include path="extract_genomic_dna_utils.py" />
+    </required_files>
     <command detect_errors="exit_code"><![CDATA[
 #set genome = $input.metadata.dbkey
 #set datatype = $input.datatype
@@ -104,7 +107,7 @@
             <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="no"/>
             <param name="reference_genome_source" value="history"/>
-            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="reference_genome" value="mm9.fasta" dbkey="mm9" ftype="fasta"/>
             <param name="output_format" value="fasta"/>
             <param name="fasta_header_type" value="char_delimited"/>
             <param name="fasta_header_delimiter" value="tilde"/>
@@ -114,7 +117,7 @@
             <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="yes"/>
             <param name="reference_genome_source" value="history"/>
-            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="reference_genome" value="mm9.fasta" dbkey="mm9"/>
             <param name="output_format" value="fasta"/>
             <param name="fasta_header_type" value="bedtools_getfasta_default"/>
             <output name="output" file="extract_genomic_dna_out2.fasta" compare="contains" />
@@ -123,7 +126,7 @@
             <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="no"/>
             <param name="reference_genome_source" value="history"/>
-            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="reference_genome" value="mm9.fasta" dbkey="mm9"/>
             <param name="output_format" value="interval"/>
             <output name="output" file="extract_genomic_dna_out3.gff" compare="contains" />
         </test>
@@ -131,7 +134,7 @@
             <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="yes"/>
             <param name="reference_genome_source" value="history"/>
-            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="reference_genome" value="mm9.fasta" dbkey="mm9"/>
             <param name="output_format" value="interval"/>
             <output name="output" file="extract_genomic_dna_out4.gff" compare="contains" />
         </test>
b
diff -r 5cc8e93ee98f -r 80414c33a59a extract_genomic_dna_utils.py
--- a/extract_genomic_dna_utils.py Fri Dec 06 15:24:00 2019 -0500
+++ b/extract_genomic_dna_utils.py Thu Nov 21 07:20:29 2024 +0000
b
@@ -178,7 +178,7 @@
             try:
                 interval = GenomicIntervalReader.next(self)
                 raw_size += len(self.current_line)
-            except StopIteration as e:
+            except StopIteration:
                 # No more intervals to read, but last feature needs to be
                 # returned.
                 interval = None