Previous changeset 11:d51db443aaa4 (2018-05-30) |
Commit message:
"Update all the pico_galaxy tools on main Tool Shed" |
modified:
tools/get_orfs_or_cdss/get_orfs_or_cdss.py tools/get_orfs_or_cdss/get_orfs_or_cdss.xml tools/get_orfs_or_cdss/tool_dependencies.xml |
b |
diff -r d51db443aaa4 -r 71905a6d52a7 tools/get_orfs_or_cdss/get_orfs_or_cdss.py --- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Wed May 30 08:33:20 2018 -0400 +++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Fri Apr 16 22:37:04 2021 +0000 |
[ |
b'@@ -10,7 +10,7 @@\n \n Cock et al 2009. Biopython: freely available Python tools for computational\n molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.\n-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n+https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.\n \n This script is copyright 2011-2013 by Peter Cock, The James Hutton Institute\n (formerly SCRI), Dundee, UK. All rights reserved.\n@@ -42,43 +42,100 @@\n \n \n parser = OptionParser(usage=usage)\n-parser.add_option(\'-i\', \'--input\', dest=\'input_file\',\n- default=None, help=\'Input fasta file\',\n- metavar=\'FILE\')\n-parser.add_option(\'-f\', \'--format\', dest=\'seq_format\',\n- default=\'fasta\', help=\'Sequence format (e.g. fasta, fastq, sff)\')\n-parser.add_option(\'--table\', dest=\'table\',\n- default=1, help=\'NCBI Translation table\', type=\'int\')\n-parser.add_option(\'-t\', \'--ftype\', dest=\'ftype\', type=\'choice\',\n- choices=[\'CDS\', \'ORF\'], default=\'ORF\',\n- help=\'Find ORF or CDSs\')\n-parser.add_option(\'-e\', \'--ends\', dest=\'ends\', type=\'choice\',\n- choices=[\'open\', \'closed\'], default=\'closed\',\n- help=\'Open or closed. Closed ensures start/stop codons are present\')\n-parser.add_option(\'-m\', \'--mode\', dest=\'mode\', type=\'choice\',\n- choices=[\'all\', \'top\', \'one\'], default=\'all\',\n- help=\'Output all ORFs/CDSs from sequence, all ORFs/CDSs \'\n- \'with max length, or first with maximum length\')\n-parser.add_option(\'--min_len\', dest=\'min_len\',\n- default=10, help=\'Minimum ORF/CDS length\', type=\'int\')\n-parser.add_option(\'-s\', \'--strand\', dest=\'strand\', type=\'choice\',\n- choices=[\'forward\', \'reverse\', \'both\'], default=\'both\',\n- help=\'Strand to search for features on\')\n-parser.add_option(\'--on\', dest=\'out_nuc_file\',\n- default=None, help=\'Output nucleotide sequences, or - for STDOUT\',\n- metavar=\'FILE\')\n-parser.add_option(\'--op\', dest=\'out_prot_file\',\n- default=None, help=\'Output protein sequences, or - for STDOUT\',\n- metavar=\'FILE\')\n-parser.add_option(\'--ob\', dest=\'out_bed_file\',\n- default=None, help=\'Output BED file, or - for STDOUT\',\n- metavar=\'FILE\')\n-parser.add_option(\'--og\', dest=\'out_gff3_file\',\n- default=None, help=\'Output GFF3 file, or - for STDOUT\',\n- metavar=\'FILE\')\n-parser.add_option(\'-v\', \'--version\', dest=\'version\',\n- default=False, action=\'store_true\',\n- help=\'Show version and quit\')\n+parser.add_option(\n+ "-i",\n+ "--input",\n+ dest="input_file",\n+ default=None,\n+ help="Input fasta file",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-f",\n+ "--format",\n+ dest="seq_format",\n+ default="fasta",\n+ help="Sequence format (e.g. fasta, fastq, sff)",\n+)\n+parser.add_option(\n+ "--table", dest="table", default=1, help="NCBI Translation table", type="int"\n+)\n+parser.add_option(\n+ "-t",\n+ "--ftype",\n+ dest="ftype",\n+ type="choice",\n+ choices=["CDS", "ORF"],\n+ default="ORF",\n+ help="Find ORF or CDSs",\n+)\n+parser.add_option(\n+ "-e",\n+ "--ends",\n+ dest="ends",\n+ type="choice",\n+ choices=["open", "closed"],\n+ default="closed",\n+ help="Open or closed. Closed ensures start/stop codons are present",\n+)\n+parser.add_option(\n+ "-m",\n+ "--mode",\n+ dest="mode",\n+ type="choice",\n+ choices=["all", "top", "one"],\n+ default="all",\n+ help="Output all ORFs/CDSs from sequence, all ORFs/CDSs "\n+ "with max length, or first with maximum length",\n+)\n+parser.add_option(\n+ "--min_len", dest="min_len", default=10, help="Minimum ORF/CDS length", type="int"\n+)\n+parser.add_option(\n+ "-s",\n+ "--strand",\n+ dest="strand",\n+ type="choice",\n+ choices=["forward", "reverse", "both"],\n+ default="both",\n+ hel'..b'bed_file, options.out_gff3_file)):\n+if not any(\n+ (\n+ options.out_nuc_file,\n+ options.out_prot_file,\n+ options.out_bed_file,\n+ options.out_gff3_file,\n+ )\n+):\n sys.exit("At least one output file is required")\n \n try:\n@@ -120,7 +184,7 @@\n \n \n def start_chop_and_trans(s, strict=True):\n- """Returns offset, trimmed nuc, protein."""\n+ """Return offset, trimmed nuc, protein."""\n if strict:\n assert s[-3:] in stops, s\n assert len(s) % 3 == 0\n@@ -140,7 +204,7 @@\n \n \n def break_up_frame(s):\n- """Returns offset, nuc, protein."""\n+ """Return offset, nuc, protein."""\n start = 0\n for match in re_stops.finditer(s):\n index = match.start() + 3\n@@ -175,7 +239,7 @@\n \n \n def get_all_peptides(nuc_seq):\n- """Returns start, end, strand, nucleotides, protein.\n+ """Return start, end, strand, nucleotides, protein.\n \n Co-ordinates are Python style zero-based.\n """\n@@ -199,7 +263,7 @@\n \n \n def get_top_peptides(nuc_seq):\n- """Returns all peptides of max length."""\n+ """Return all peptides of max length."""\n values = list(get_all_peptides(nuc_seq))\n if not values:\n raise StopIteration\n@@ -210,7 +274,7 @@\n \n \n def get_one_peptide(nuc_seq):\n- """Returns first (left most) peptide with max length."""\n+ """Return first (left most) peptide with max length."""\n values = list(get_top_peptides(nuc_seq))\n if not values:\n raise StopIteration\n@@ -255,17 +319,23 @@\n out_gff3 = None\n \n if out_gff3:\n- out_gff3.write(\'##gff-version 3\\n\')\n+ out_gff3.write("##gff-version 3\\n")\n \n for record in SeqIO.parse(options.input_file, seq_format):\n- for i, (f_start, f_end, f_strand, n, t) in enumerate(get_peptides(str(record.seq).upper())):\n+ for i, (f_start, f_end, f_strand, n, t) in enumerate(\n+ get_peptides(str(record.seq).upper())\n+ ):\n out_count += 1\n if f_strand == +1:\n loc = "%i..%i" % (f_start + 1, f_end)\n else:\n loc = "complement(%i..%i)" % (f_start + 1, f_end)\n- descr = "length %i aa, %i bp, from %s of %s" \\\n- % (len(t), len(n), loc, record.description)\n+ descr = "length %i aa, %i bp, from %s of %s" % (\n+ len(t),\n+ len(n),\n+ loc,\n+ record.description,\n+ )\n fid = record.id + "|%s%i" % (options.ftype, i + 1)\n r = SeqRecord(Seq(n), id=fid, name="", description=descr)\n t = SeqRecord(Seq(t), id=fid, name="", description=descr)\n@@ -273,12 +343,32 @@\n SeqIO.write(r, out_nuc, "fasta")\n if out_prot:\n SeqIO.write(t, out_prot, "fasta")\n- nice_strand = \'+\' if f_strand == +1 else \'-\'\n+ nice_strand = "+" if f_strand == +1 else "-"\n if out_bed:\n- out_bed.write(\'\\t\'.join(map(str, [record.id, f_start, f_end, fid, 0, nice_strand])) + \'\\n\')\n+ out_bed.write(\n+ "\\t".join(map(str, [record.id, f_start, f_end, fid, 0, nice_strand]))\n+ + "\\n"\n+ )\n if out_gff3:\n- out_gff3.write(\'\\t\'.join(map(str, [record.id, \'getOrfsOrCds\', \'CDS\', f_start + 1, f_end, \'.\',\n- nice_strand, 0, \'ID=%s%s\' % (options.ftype, i + 1)])) + \'\\n\')\n+ out_gff3.write(\n+ "\\t".join(\n+ map(\n+ str,\n+ [\n+ record.id,\n+ "getOrfsOrCds",\n+ "CDS",\n+ f_start + 1,\n+ f_end,\n+ ".",\n+ nice_strand,\n+ 0,\n+ "ID=%s%s" % (options.ftype, i + 1),\n+ ],\n+ )\n+ )\n+ + "\\n"\n+ )\n in_count += 1\n if out_nuc and out_nuc is not sys.stdout:\n out_nuc.close()\n' |
b |
diff -r d51db443aaa4 -r 71905a6d52a7 tools/get_orfs_or_cdss/get_orfs_or_cdss.xml --- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Wed May 30 08:33:20 2018 -0400 +++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Fri Apr 16 22:37:04 2021 +0000 |
b |
@@ -184,14 +184,14 @@ Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). Galaxy tools and workflows for sequence analysis with applications in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 +https://doi.org/10.7717/peerj.167 This tool uses Biopython, so you may also wish to cite the Biopython application note (and Galaxy too of course): Cock et al (2009). Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. -http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. +https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878. This tool is available to install into other Galaxy Instances via the Galaxy Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/get_orfs_or_cdss |
b |
diff -r d51db443aaa4 -r 71905a6d52a7 tools/get_orfs_or_cdss/tool_dependencies.xml --- a/tools/get_orfs_or_cdss/tool_dependencies.xml Wed May 30 08:33:20 2018 -0400 +++ b/tools/get_orfs_or_cdss/tool_dependencies.xml Fri Apr 16 22:37:04 2021 +0000 |
b |
@@ -1,6 +1,6 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <tool_dependency> <package name="biopython" version="1.67"> - <repository changeset_revision="a12f73c3b116" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" changeset_revision="a12f73c3b116"/> </package> -</tool_dependency> +</tool_dependency> \ No newline at end of file |