Mercurial > repos > bgruening > replace_column_by_key_value_file
changeset 1:d533e4b75800 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 0def21576e206a0732ce63bacd18533064ddf155
author | bgruening |
---|---|
date | Sun, 23 Sep 2018 04:03:34 -0400 |
parents | cc18bac5afdb |
children | |
files | replaceColumn.xml test-data/neg_test_commented.txt test-data/neg_test_map.txt test-data/original_file |
diffstat | 4 files changed, 107 insertions(+), 65 deletions(-) [+] |
line wrap: on
line diff
--- a/replaceColumn.xml Fri Feb 24 10:14:15 2017 -0500 +++ b/replaceColumn.xml Sun Sep 23 04:03:34 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="replace_column_with_key_value_file" name="Replace column" version="0.1"> +<tool id="replace_column_with_key_value_file" name="Replace column" version="0.2"> <description>by values which are defined in a convert file</description> <command> <![CDATA[ @@ -14,35 +14,39 @@ original_file = '$original_file' column = int("$column_replace") - 1 ignore_start_lines = int("$skip_lines") -delimiter_local = "\t" if str("$delimiter") == "" else str("$delimiter") +delimiter_local = "\t" if str("$delimiter") == "tab" else str("$delimiter") +comment_str = str("$pass_comments") +unk_strat = str("$unknowns_strategy") -## read conversion information to index +## read conversion information to index conversion = {} with open(replace_file, 'r') as conversion_file: for line in conversion_file: conv_key_value = line.strip().split() if len(conv_key_value) == 2: - conversion[conv_key_value[0]] = conv_key_value[1] + conversion[conv_key_value[0]] = conv_key_value[1] ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped. with open("output_file", 'w') as output: with open(original_file) as original: for i, line in enumerate(original): - if i < ignore_start_lines: + if i < ignore_start_lines or (comment_str and line.startswith(comment_str)): output.write(line) continue - if str("$delimiter") == "": - line_content = line.split() - else: - line_content = line.split(str("$delimiter")) + line_content = line.rstrip().split(delimiter_local) out = list() for j, line_content_column in enumerate(line_content): if j == column: + if line_content_column in conversion: out.append(conversion[line_content_column]) + elif unk_strat == "print": + out.append(line_content_column) + elif unk_strat == "error": + raise Exception('ERROR: Encountered a value [%s] in the file that is not in the replacements file and is not commented with [%s]' % (line_content_column, comment_str)) else: out.append(line_content_column) @@ -63,14 +67,26 @@ label="Which column should be replaced?" /> <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" /> <param name="delimiter" type="select" label="Delimited by"> - <option value="" selected="True">Tab</option> - <option value=" ">Whitespace</option> + <option value="tab" selected="True">Tab</option> + <option value=" ">Space</option> <option value=".">Dot</option> <option value=",">Comma</option> <option value="-">Dash</option> <option value="_">Underscore</option> <option value="|">Pipe</option> </param> + <param name="unknowns_strategy" type="select" label="When an unknown value is encountered"> + <option value="skip" selected="True">Skip / Do not print</option> + <option value="print">Print without modification</option> + <option value="error">Exit with an error</option> + </param> + <param name="pass_comments" type="text" value="#" label="Do not perform replacement on lines starting with"> + <sanitizer> + <valid> + <add value="#" /> + </valid> + </sanitizer> + </param> </inputs> <outputs> <data name="outfile_replace" format="txt" from_work_dir="output_file"/> @@ -81,7 +97,9 @@ <param name="original_file" value="original_file" ftype="tabular" /> <param name="column_replace" value="1"/> <param name="skip_lines" value="1"/> - <param name="delimiter" value="" /> + <param name="delimiter" value="tab" /> + <param name="unknowns_strategy" value="skip"/> + <param name="pass_comments" value="#"/> <output name="outfile_replace" file="result_file"/> </test> <test> @@ -89,16 +107,37 @@ <param name="original_file" value="empty_mapping" ftype="tabular" /> <param name="column_replace" value="1"/> <param name="skip_lines" value="1"/> - <param name="delimiter" value="" /> + <param name="delimiter" value="tab" /> + <param name="unknowns_strategy" value="skip"/> + <param name="pass_comments" value="#"/> <output name="outfile_replace" file="result_file_empty_mapping"/> </test> + <test expect_failure="True"> + <param name="replace_information" value="neg_test_map.txt" ftype="tabular" /> + <param name="original_file" value="neg_test_commented.txt" ftype="tabular" /> + <param name="column_replace" value="1"/> + <param name="skip_lines" value="0"/> + <param name="delimiter" value="tab" /> + <param name="unknowns_strategy" value="error"/> + <param name="pass_comments" value="#"/> + </test> + <test> + <param name="replace_information" value="neg_test_map.txt" ftype="tabular" /> + <param name="original_file" value="neg_test_commented.txt" ftype="tabular" /> + <param name="column_replace" value="1"/> + <param name="skip_lines" value="0"/> + <param name="delimiter" value="tab" /> + <param name="unknowns_strategy" value="print"/> + <param name="pass_comments" value="#"/> + <output name="outfile_replace" file="neg_test_commented.txt"/> + </test> </tests> <help> <![CDATA[ **What it does** -This tool replaces the entries of a defined column with entries given by a replacement file. -For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. +This tool replaces the entries of a defined column with entries given by a replacement file. +For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation. A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/neg_test_commented.txt Sun Sep 23 04:03:34 2018 -0400 @@ -0,0 +1,2 @@ +#test +NC_000964.33 should not match
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/neg_test_map.txt Sun Sep 23 04:03:34 2018 -0400 @@ -0,0 +1,1 @@ +NC_000964.3 Chromosome
--- a/test-data/original_file Fri Feb 24 10:14:15 2017 -0500 +++ b/test-data/original_file Sun Sep 23 04:03:34 2018 -0400 @@ -1,51 +1,51 @@ track type="bedGraph" description="BT089 CpG merged methylation fractions" -1 10468 10470 0.209302 -1 10470 10472 0.611111 -1 10483 10485 0.428571 -1 10488 10490 0.846154 -1 10492 10494 0.666667 -1 10496 10498 0.916667 -1 10524 10526 0.916667 -1 10541 10543 0.818182 -1 10562 10564 0.615385 -1 10570 10572 0.916667 -1 10576 10578 0.615385 -1 10578 10580 0.538462 -1 10588 10590 0.909091 -1 10608 10610 0.700000 -1 10616 10618 0.875000 -1 10619 10621 0.714286 -1 10630 10632 0.428571 -1 10632 10634 0.444444 -1 10635 10637 0.400000 -1 10637 10639 0.400000 -1 10640 10642 0.900000 -1 10643 10645 0.600000 -1 10649 10651 0.727273 -1 10659 10661 0.857143 -1 10661 10663 0.428571 -1 10664 10666 0.846154 -1 10666 10668 0.750000 -1 10669 10671 0.916667 -1 10672 10674 0.916667 -1 10678 10680 1.000000 -1 10688 10690 0.900000 -1 10690 10692 0.545455 -1 10693 10695 1.000000 -1 10695 10697 0.909091 -1 10698 10700 0.916667 -1 10701 10703 1.000000 -1 10707 10709 1.000000 -1 10717 10719 0.866667 -1 10719 10721 0.692308 -1 10722 10724 1.000000 -1 10724 10726 0.933333 -1 10727 10729 0.933333 -1 10730 10732 1.000000 -1 10736 10738 0.933333 -1 10746 10748 0.857143 -1 10748 10750 0.500000 -1 10751 10753 0.928571 -1 10753 10755 0.857143 -1 10756 10758 1.000000 -1 10759 10761 0.857143 +1 10468 10470 0.209302 +1 10470 10472 0.611111 +1 10483 10485 0.428571 +1 10488 10490 0.846154 +1 10492 10494 0.666667 +1 10496 10498 0.916667 +1 10524 10526 0.916667 +1 10541 10543 0.818182 +1 10562 10564 0.615385 +1 10570 10572 0.916667 +1 10576 10578 0.615385 +1 10578 10580 0.538462 +1 10588 10590 0.909091 +1 10608 10610 0.700000 +1 10616 10618 0.875000 +1 10619 10621 0.714286 +1 10630 10632 0.428571 +1 10632 10634 0.444444 +1 10635 10637 0.400000 +1 10637 10639 0.400000 +1 10640 10642 0.900000 +1 10643 10645 0.600000 +1 10649 10651 0.727273 +1 10659 10661 0.857143 +1 10661 10663 0.428571 +1 10664 10666 0.846154 +1 10666 10668 0.750000 +1 10669 10671 0.916667 +1 10672 10674 0.916667 +1 10678 10680 1.000000 +1 10688 10690 0.900000 +1 10690 10692 0.545455 +1 10693 10695 1.000000 +1 10695 10697 0.909091 +1 10698 10700 0.916667 +1 10701 10703 1.000000 +1 10707 10709 1.000000 +1 10717 10719 0.866667 +1 10719 10721 0.692308 +1 10722 10724 1.000000 +1 10724 10726 0.933333 +1 10727 10729 0.933333 +1 10730 10732 1.000000 +1 10736 10738 0.933333 +1 10746 10748 0.857143 +1 10748 10750 0.500000 +1 10751 10753 0.928571 +1 10753 10755 0.857143 +1 10756 10758 1.000000 +1 10759 10761 0.857143