Previous changeset 10:5cc8e93ee98f (2019-12-06) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 6db2d98b513e4980788fcba49d809c91e5750296 |
modified:
extract_genomic_dna.py extract_genomic_dna.xml extract_genomic_dna_utils.py |
b |
diff -r 5cc8e93ee98f -r 80414c33a59a extract_genomic_dna.py --- a/extract_genomic_dna.py Fri Dec 06 15:24:00 2019 -0500 +++ b/extract_genomic_dna.py Thu Nov 21 07:20:29 2024 +0000 |
[ |
b'@@ -11,24 +11,47 @@\n import extract_genomic_dna_utils as egdu # noqa: I100,I202\n \n parser = argparse.ArgumentParser()\n-parser.add_argument(\'--input_format\', dest=\'input_format\', help="Input dataset format")\n-parser.add_argument(\'--input\', dest=\'input\', help="Input dataset")\n-parser.add_argument(\'--genome\', dest=\'genome\', help="Input dataset genome build")\n-parser.add_argument(\'--interpret_features\', dest=\'interpret_features\', default=None, help="Interpret features if input format is gff")\n-parser.add_argument(\'--columns\', dest=\'columns\', help="Columns to use in input file")\n-parser.add_argument(\'--reference_genome_source\', dest=\'reference_genome_source\', help="Source of reference genome file")\n-parser.add_argument(\'--reference_genome\', dest=\'reference_genome\', help="Reference genome file")\n-parser.add_argument(\'--output_format\', dest=\'output_format\', help="Output format")\n-parser.add_argument(\'--fasta_header_type\', dest=\'fasta_header_type\', default=None, help="Fasta header format")\n-parser.add_argument(\'--fasta_header_delimiter\', dest=\'fasta_header_delimiter\', default=None, help="Fasta header field delimiter")\n-parser.add_argument(\'--output\', dest=\'output\', help="Output dataset")\n+parser.add_argument("--input_format", dest="input_format", help="Input dataset format")\n+parser.add_argument("--input", dest="input", help="Input dataset")\n+parser.add_argument("--genome", dest="genome", help="Input dataset genome build")\n+parser.add_argument(\n+ "--interpret_features",\n+ dest="interpret_features",\n+ default=None,\n+ help="Interpret features if input format is gff",\n+)\n+parser.add_argument("--columns", dest="columns", help="Columns to use in input file")\n+parser.add_argument(\n+ "--reference_genome_source",\n+ dest="reference_genome_source",\n+ help="Source of reference genome file",\n+)\n+parser.add_argument(\n+ "--reference_genome", dest="reference_genome", help="Reference genome file"\n+)\n+parser.add_argument("--output_format", dest="output_format", help="Output format")\n+parser.add_argument(\n+ "--fasta_header_type",\n+ dest="fasta_header_type",\n+ default=None,\n+ help="Fasta header format",\n+)\n+parser.add_argument(\n+ "--fasta_header_delimiter",\n+ dest="fasta_header_delimiter",\n+ default=None,\n+ help="Fasta header field delimiter",\n+)\n+parser.add_argument("--output", dest="output", help="Output dataset")\n args = parser.parse_args()\n \n-input_is_gff = args.input_format == \'gff\'\n+input_is_gff = args.input_format == "gff"\n interpret_features = input_is_gff and args.interpret_features == "yes"\n-if len(args.columns.split(\',\')) == 5:\n+if len(args.columns.split(",")) == 5:\n # Bed file.\n- chrom_col, start_col, end_col, strand_col, name_col = egdu.parse_cols_arg(args.columns)\n+ chrom_col, start_col, end_col, strand_col, name_col = egdu.parse_cols_arg(\n+ args.columns\n+ )\n else:\n # Gff file.\n chrom_col, start_col, end_col, strand_col = egdu.parse_cols_arg(args.columns)\n@@ -47,13 +70,13 @@\n first_invalid_line = 0\n invalid_lines = []\n warnings = []\n-warning = \'\'\n+warning = ""\n twobitfile = None\n line_count = 1\n file_iterator = open(args.input)\n if interpret_features:\n file_iterator = egdu.GFFReaderWrapper(file_iterator, fix_strand=False)\n-out = open(args.output, \'wt\')\n+out = open(args.output, "wt")\n \n for feature in file_iterator:\n # Ignore comments, headers.\n@@ -70,9 +93,9 @@\n strand = feature.strand\n else:\n # Processing lines, either interval or GFF format.\n- line = feature.rstrip(\'\\r\\n\')\n+ line = feature.rstrip("\\r\\n")\n if line and not line.startswith("#"):\n- fields = line.split(\'\\t\')\n+ fields = line.split("\\t")\n try:\n chrom = fields[chrom_col]\n start = int(fields[start_col])\n@@ -99,9 +122,9 @@\n first_invalid_line = line_count\n skipped_lines += len(invalid_lines)\n continue\n- if strand not in [\'+\', \'-'..b' args.genome,\n+ )\n warnings.append(warning)\n if not invalid_lines:\n invalid_lines = egdu.get_lines(feature)\n first_invalid_line = line_count\n skipped_lines += len(invalid_lines)\n continue\n- if sequence == \'\':\n- warning = "Chrom: \'%s\', start: \'%d\', end: \'%d\' is either invalid or not present in build \'%s\'. " % (chrom, start, end, args.genome)\n+ if sequence == "":\n+ warning = (\n+ "Chrom: \'%s\', start: \'%d\', end: \'%d\' is either invalid or not present in build \'%s\'. "\n+ % (chrom, start, end, args.genome)\n+ )\n warnings.append(warning)\n if not invalid_lines:\n invalid_lines = egdu.get_lines(feature)\n@@ -161,15 +200,18 @@\n if input_is_gff:\n start, end = egdu.convert_bed_coords_to_gff([start, end])\n if args.fasta_header_type == "bedtools_getfasta_default":\n- out.write(">%s\\n" % egdu.get_bedtools_getfasta_default_header(str(chrom),\n- str(start),\n- str(end),\n- strand,\n- includes_strand_col))\n+ out.write(\n+ ">%s\\n"\n+ % egdu.get_bedtools_getfasta_default_header(\n+ str(chrom), str(start), str(end), strand, includes_strand_col\n+ )\n+ )\n else:\n # args.fasta_header_type == "char_delimited":\n fields = [args.genome, str(chrom), str(start), str(end), strand]\n- field_delimiter = egdu.get_fasta_header_delimiter(args.fasta_header_delimiter)\n+ field_delimiter = egdu.get_fasta_header_delimiter(\n+ args.fasta_header_delimiter\n+ )\n meta_data = field_delimiter.join(fields)\n if name.strip():\n out.write(">%s %s\\n" % (meta_data, name))\n@@ -184,20 +226,24 @@\n else:\n # output_format == "interval".\n if interpret_features:\n- meta_data = "\\t".join([feature.chrom,\n- "galaxy_extract_genomic_dna",\n- "interval",\n- str(feature.start),\n- str(feature.end),\n- feature.score,\n- feature.strand,\n- ".",\n- egdu.gff_attributes_to_str(feature.attributes, "GTF")])\n+ meta_data = "\\t".join(\n+ [\n+ feature.chrom,\n+ "galaxy_extract_genomic_dna",\n+ "interval",\n+ str(feature.start),\n+ str(feature.end),\n+ feature.score,\n+ feature.strand,\n+ ".",\n+ egdu.gff_attributes_to_str(feature.attributes, "GTF"),\n+ ]\n+ )\n else:\n # Here fields was set up around line 73.\n meta_data = "\\t".join(fields)\n if input_is_gff:\n- format_str = "%s seq \\"%s\\";\\n"\n+ format_str = \'%s seq "%s";\\n\'\n else:\n format_str = "%s\\t%s\\n"\n out.write(format_str % (meta_data, str(sequence)))\n@@ -214,7 +260,10 @@\n print(warn_msg)\n if skipped_lines:\n # Error message includes up to the first 10 skipped lines.\n- print(\'Skipped %d invalid lines, 1st is #%d, "%s"\' % (skipped_lines, first_invalid_line, \'\\n\'.join(invalid_lines[:10])))\n+ print(\n+ \'Skipped %d invalid lines, 1st is #%d, "%s"\'\n+ % (skipped_lines, first_invalid_line, "\\n".join(invalid_lines[:10]))\n+ )\n \n if args.reference_genome_source == "history":\n os.remove(seq_path)\n' |
b |
diff -r 5cc8e93ee98f -r 80414c33a59a extract_genomic_dna.xml --- a/extract_genomic_dna.xml Fri Dec 06 15:24:00 2019 -0500 +++ b/extract_genomic_dna.xml Thu Nov 21 07:20:29 2024 +0000 |
[ |
@@ -1,10 +1,13 @@ -<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.3+galaxy2"> +<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.3+galaxy3"> <description>using coordinates from assembled/unassembled genomes</description> <requirements> <requirement type="package" version="0.7.1">bx-python</requirement> <requirement type="package" version="1.13.0">six</requirement> <requirement type="package" version="377">ucsc-fatotwobit</requirement> </requirements> + <required_files> + <include path="extract_genomic_dna_utils.py" /> + </required_files> <command detect_errors="exit_code"><![CDATA[ #set genome = $input.metadata.dbkey #set datatype = $input.datatype @@ -104,7 +107,7 @@ <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="reference_genome_source" value="history"/> - <param name="reference_genome" value="mm9.fasta"/> + <param name="reference_genome" value="mm9.fasta" dbkey="mm9" ftype="fasta"/> <param name="output_format" value="fasta"/> <param name="fasta_header_type" value="char_delimited"/> <param name="fasta_header_delimiter" value="tilde"/> @@ -114,7 +117,7 @@ <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="yes"/> <param name="reference_genome_source" value="history"/> - <param name="reference_genome" value="mm9.fasta"/> + <param name="reference_genome" value="mm9.fasta" dbkey="mm9"/> <param name="output_format" value="fasta"/> <param name="fasta_header_type" value="bedtools_getfasta_default"/> <output name="output" file="extract_genomic_dna_out2.fasta" compare="contains" /> @@ -123,7 +126,7 @@ <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="reference_genome_source" value="history"/> - <param name="reference_genome" value="mm9.fasta"/> + <param name="reference_genome" value="mm9.fasta" dbkey="mm9"/> <param name="output_format" value="interval"/> <output name="output" file="extract_genomic_dna_out3.gff" compare="contains" /> </test> @@ -131,7 +134,7 @@ <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="yes"/> <param name="reference_genome_source" value="history"/> - <param name="reference_genome" value="mm9.fasta"/> + <param name="reference_genome" value="mm9.fasta" dbkey="mm9"/> <param name="output_format" value="interval"/> <output name="output" file="extract_genomic_dna_out4.gff" compare="contains" /> </test> |
b |
diff -r 5cc8e93ee98f -r 80414c33a59a extract_genomic_dna_utils.py --- a/extract_genomic_dna_utils.py Fri Dec 06 15:24:00 2019 -0500 +++ b/extract_genomic_dna_utils.py Thu Nov 21 07:20:29 2024 +0000 |
b |
@@ -178,7 +178,7 @@ try: interval = GenomicIntervalReader.next(self) raw_size += len(self.current_line) - except StopIteration as e: + except StopIteration: # No more intervals to read, but last feature needs to be # returned. interval = None |