Previous changeset 2:c4fd2ea4f988 (2014-11-13) |
Commit message:
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty" |
modified:
find_in_reference.py find_in_reference.xml |
b |
diff -r c4fd2ea4f988 -r 2429b413d90a find_in_reference.py --- a/find_in_reference.py Thu Nov 13 14:09:50 2014 -0600 +++ b/find_in_reference.py Thu May 12 19:30:54 2022 +0000 |
[ |
b'@@ -1,168 +1,177 @@\n-#!/usr/bin/env python\n+#!/usr/bin/env python3\n+\n+\n+import os.path\n+import sys\n+import optparse\n+\n+\n """\n #\n #------------------------------------------------------------------------------\n-# University of Minnesota\n-# Copyright 2013, Regents of the University of Minnesota\n+# University of Minnesota\n+# Copyright 2013, Regents of the University of Minnesota\n #------------------------------------------------------------------------------\n # Author:\n #\n-# James E Johnson\n+# James E Johnson\n #\n #------------------------------------------------------------------------------\n """\n \n """\n-Takes 2 tabular files as input: \n- 1. The file to be filtered \n- 2. The reference file \n+Takes 2 tabular files as input:\n+ 1. The file to be filtered\n+ 2. The reference file\n \n-The string value of selected column of the input file is searched for \n+The string value of selected column of the input file is searched for\n in the string values of the selected column of the reference file.\n \n-The intended purpose is to filter a peptide fasta file in tabular format \n+The intended purpose is to filter a peptide fasta file in tabular format\n by whether those peptide sequences are found in a reference fasta file.\n \n """\n-import sys,re,os.path\n-import tempfile\n-import optparse\n-from optparse import OptionParser\n-import logging\n \n \n def __main__():\n- #Parse Command Line\n- parser = optparse.OptionParser()\n- parser.add_option( \'-i\', \'--input\', dest=\'input\', help=\'The input file to filter. (Otherwise read from stdin)\' )\n- parser.add_option( \'-r\', \'--reference\', dest=\'reference\', help=\'The reference file to filter against\' )\n- parser.add_option( \'-o\', \'--output\', dest=\'output\', help=\'The output file for input lines filtered by reference\')\n- parser.add_option( \'-f\', \'--filtered\', dest=\'filtered\', help=\'The output file for input lines not in the output\')\n- parser.add_option(\'-c\',\'--input_column\', dest=\'input_column\', default=None, help=\'The column for the value in the input file. (first column = 1, default to last column)\')\n- parser.add_option(\'-C\',\'--reference_column\', dest=\'reference_column\', default=None, help=\'The column for the value in the reference file. (first column = 1, default to last column)\')\n- parser.add_option( \'-I\', \'--case_insensitive\', dest=\'ignore_case\', action="store_true", default=False, help=\'case insensitive\' )\n- parser.add_option( \'-R\', \'--reverse_find\', dest=\'reverse_find\', action="store_true", default=False, help=\'find the reference string in the input string\' )\n- parser.add_option( \'-B\', \'--test_reverse\', dest=\'test_reverse\', action="store_true", default=False, help=\'Also search for reversed input string in reference\' )\n- parser.add_option( \'-D\', \'--test_dna_reverse_complement\', dest=\'test_reverse_comp\', action="store_true", default=False, help=\'Also search for the DNA reverse complement of input string\' )\n- parser.add_option( \'-k\', \'--keep\', dest=\'keep\', action="store_true", default=False, help=\'\' )\n- parser.add_option( \'-a\', \'--annotation_columns\', dest=\'annotation_columns\', default=None, help=\'If string is found, add these columns from reference\' )\n- parser.add_option( \'-s\', \'--annotation_separator\', dest=\'annotation_separator\', default=\';\', help=\'separator character between annotations from different lines\' )\n- parser.add_option( \'-S\', \'--annotation_col_sep\', dest=\'annotation_col_sep\', default=\',\', help=\'separator character between annotation column from the same line\' )\n- parser.add_option( \'-d\', \'--debug\', dest=\'debug\', action=\'store_true\', default=False, help=\'Turn on wrapper debugging to stdout\' )\n- (options, args) = parser.parse_args()\n+ # Parse Command Line\n+ parser = optparse.OptionParser()\n+ parser.add_option(\'-i\', \'--input\', dest=\'input\', help=\'The input file to filter. (Otherwise read from stdin)\')\n+ parser.add_option(\'-r\', \'--reference\', dest=\'reference\', help=\'The re'..b'e:\n+ print("failed: %s" % e, file=sys.stderr)\n+ exit(2)\n+ else:\n+ inputFile = sys.stdin\n+ # Reference\n+ if options.reference is None:\n+ print("failed: reference file is required", file=sys.stderr)\n+ exit(2)\n+ # Output files\n+ outFile = None\n+ filteredFile = None\n+ if options.filtered is None and options.output is None:\n+ # write to stdout\n+ outFile = sys.stdout\n+ else:\n+ if options.output is not None:\n+ try:\n+ outPath = os.path.abspath(options.output)\n+ outFile = open(outPath, \'w\')\n+ except Exception as e:\n+ print("failed: %s" % e, file=sys.stderr)\n+ exit(3)\n+ if options.filtered is not None:\n+ try:\n+ filteredPath = os.path.abspath(options.filtered)\n+ filteredFile = open(filteredPath, \'w\')\n+ except Exception as e:\n+ print("failed: %s" % e, file=sys.stderr)\n+ exit(3)\n+ incol = -1\n+ if options.input_column and options.input_column > 0:\n+ incol = int(options.input_column)-1\n+ refcol = -1\n+ if options.reference_column and options.reference_column > 0:\n+ refcol = int(options.reference_column)-1\n+ if options.annotation_columns:\n+ annotate = True\n+ annotation_columns = [int(x) - 1 for x in options.annotation_columns.split(\', \')]\n+ else:\n+ annotate = False\n+ refFile = None\n+ num_found = 0\n+ num_novel = 0\n+ for ln, line in enumerate(inputFile):\n+ annotations = []\n+ try:\n+ found = False\n+ search_string = line.split(\'\\t\')[incol].rstrip(\'\\r\\n\')\n+ if options.ignore_case:\n+ search_string = search_string.upper()\n+ if options.debug:\n+ print("search: %s" % (search_string), file=sys.stderr)\n+ refFile = open(options.reference, \'r\')\n+ for tn, fline in enumerate(refFile):\n+ fields = fline.split(\'\\t\')\n+ target_string = fields[refcol].rstrip(\'\\r\\n\')\n+ if options.ignore_case:\n+ target_string = target_string.upper()\n+ search = search_string if not options.reverse_find else target_string\n+ target = target_string if not options.reverse_find else search_string\n+ if options.debug:\n+ print("in: %s %s %s" % (search, search in target, target), file=sys.stderr)\n+ if search in target or test_reverse(search, target) or test_rcomplement(search, target):\n+ found = True\n+ if annotate:\n+ annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns])\n+ annotations.append(annotation)\n+ else:\n+ break\n+ if found:\n+ num_found += 1\n+ if annotate:\n+ line = \'%s\\t%s\\n\' % (line.rstrip(\'\\r\\n\'), options.annotation_separator.join(annotations))\n+ if options.keep is True:\n+ if outFile:\n+ outFile.write(line)\n+ else:\n+ if filteredFile:\n+ filteredFile.write(line)\n+ else:\n+ num_novel += 1\n+ if options.keep is True:\n+ if filteredFile:\n+ filteredFile.write(line)\n+ else:\n+ if outFile:\n+ outFile.write(line)\n+ except Exception as e:\n+ print("failed: Error reading %s - %s" % (options.reference, e), file=sys.stderr)\n+ finally:\n+ if refFile:\n+ refFile.close()\n+ print("found: %d novel: %d" % (num_found, num_novel), file=sys.stdout)\n \n-if __name__ == "__main__" : __main__()\n \n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r c4fd2ea4f988 -r 2429b413d90a find_in_reference.xml --- a/find_in_reference.xml Thu Nov 13 14:09:50 2014 -0600 +++ b/find_in_reference.xml Thu May 12 19:30:54 2022 +0000 |
b |
@@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool id="find_in_reference" name="find in reference" version="0.0.3"> +<tool id="find_in_reference" name="find in reference" version="0.1.0"> <description>filter peptides that are present in proteins</description> <command interpreter="python">find_in_reference.py --input "$input" --reference "$reference" @@ -106,6 +106,11 @@ <test> <param name="input" value="human_proteins.tabular" ftype="tabular" dbkey="hg19"/> <param name="reference" value="human_peptides.tabular" ftype="tabular" dbkey="hg19"/> + <conditional name="column"> + <param name="set" value="yes"/> + <param name="input_column" value="2"/> + <param name="reference_column" value="2"/> + </conditional> <param name="reverse_find" value="True"/> <param name="outputs" value="found"/> <output name="found" file="found_proteins.tabular"/> |