Repository 'replace_column_by_key_value_file'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/replace_column_by_key_value_file

Changeset 1:d533e4b75800 (2018-09-23)
Previous changeset 0:cc18bac5afdb (2017-02-24)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 0def21576e206a0732ce63bacd18533064ddf155
modified:
replaceColumn.xml
test-data/original_file
added:
test-data/neg_test_commented.txt
test-data/neg_test_map.txt
b
diff -r cc18bac5afdb -r d533e4b75800 replaceColumn.xml
--- a/replaceColumn.xml Fri Feb 24 10:14:15 2017 -0500
+++ b/replaceColumn.xml Sun Sep 23 04:03:34 2018 -0400
[
@@ -1,4 +1,4 @@
-<tool id="replace_column_with_key_value_file" name="Replace column" version="0.1">
+<tool id="replace_column_with_key_value_file" name="Replace column" version="0.2">
     <description>by values which are defined in a convert file</description>
     <command>
         <![CDATA[
@@ -14,35 +14,39 @@
 original_file = '$original_file'
 column = int("$column_replace") - 1
 ignore_start_lines = int("$skip_lines")
-delimiter_local = "\t" if str("$delimiter") == "" else str("$delimiter")
+delimiter_local = "\t" if str("$delimiter") == "tab" else str("$delimiter")
+comment_str = str("$pass_comments")
+unk_strat = str("$unknowns_strategy")
 
-## read conversion information to index 
+## read conversion information to index
 conversion = {}
 
 with open(replace_file, 'r') as conversion_file:
     for line in conversion_file:
         conv_key_value = line.strip().split()
         if len(conv_key_value) == 2:
-            conversion[conv_key_value[0]] = conv_key_value[1]                
+            conversion[conv_key_value[0]] = conv_key_value[1]
 
 ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped.
 with open("output_file", 'w') as output:
     with open(original_file) as original:
         for i, line in enumerate(original):
-            if i < ignore_start_lines:
+            if i < ignore_start_lines or (comment_str and line.startswith(comment_str)):
                 output.write(line)
                 continue
 
-            if str("$delimiter") == "":
-                line_content = line.split()
-            else:
-                line_content = line.split(str("$delimiter"))
+            line_content = line.rstrip().split(delimiter_local)
 
             out = list()
             for j, line_content_column in enumerate(line_content):
                 if j == column:
+
                     if line_content_column in conversion:
                         out.append(conversion[line_content_column])
+                    elif unk_strat == "print":
+                        out.append(line_content_column)
+                    elif unk_strat == "error":
+                        raise Exception('ERROR: Encountered a value [%s] in the file that is not in the replacements file and is not commented with [%s]' % (line_content_column, comment_str))
                 else:
                     out.append(line_content_column)
 
@@ -63,14 +67,26 @@
                label="Which column should be replaced?" />
         <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" />
         <param name="delimiter" type="select" label="Delimited by">
-            <option value="" selected="True">Tab</option>
-            <option value=" ">Whitespace</option>
+            <option value="tab" selected="True">Tab</option>
+            <option value=" ">Space</option>
             <option value=".">Dot</option>
             <option value=",">Comma</option>
             <option value="-">Dash</option>
             <option value="_">Underscore</option>
             <option value="|">Pipe</option>
         </param>
+        <param name="unknowns_strategy" type="select" label="When an unknown value is encountered">
+            <option value="skip" selected="True">Skip / Do not print</option>
+            <option value="print">Print without modification</option>
+            <option value="error">Exit with an error</option>
+        </param>
+        <param name="pass_comments" type="text" value="#" label="Do not perform replacement on lines starting with">
+            <sanitizer>
+                <valid>
+                    <add value="#" />
+                </valid>
+            </sanitizer>
+        </param>
     </inputs>
     <outputs>
         <data  name="outfile_replace" format="txt" from_work_dir="output_file"/>
@@ -81,7 +97,9 @@
             <param name="original_file" value="original_file" ftype="tabular" />
             <param name="column_replace" value="1"/>
             <param name="skip_lines" value="1"/>
-            <param name="delimiter" value="" />
+            <param name="delimiter" value="tab" />
+            <param name="unknowns_strategy" value="skip"/>
+            <param name="pass_comments" value="#"/>
             <output name="outfile_replace" file="result_file"/>
         </test>
         <test>
@@ -89,16 +107,37 @@
             <param name="original_file" value="empty_mapping" ftype="tabular" />
             <param name="column_replace" value="1"/>
             <param name="skip_lines" value="1"/>
-            <param name="delimiter" value="" />
+            <param name="delimiter" value="tab" />
+            <param name="unknowns_strategy" value="skip"/>
+            <param name="pass_comments" value="#"/>
             <output name="outfile_replace" file="result_file_empty_mapping"/>
         </test>
+        <test expect_failure="True">
+            <param name="replace_information" value="neg_test_map.txt" ftype="tabular" />
+            <param name="original_file" value="neg_test_commented.txt" ftype="tabular" />
+            <param name="column_replace" value="1"/>
+            <param name="skip_lines" value="0"/>
+            <param name="delimiter" value="tab" />
+            <param name="unknowns_strategy" value="error"/>
+            <param name="pass_comments" value="#"/>
+        </test>
+        <test>
+            <param name="replace_information" value="neg_test_map.txt" ftype="tabular" />
+            <param name="original_file" value="neg_test_commented.txt" ftype="tabular" />
+            <param name="column_replace" value="1"/>
+            <param name="skip_lines" value="0"/>
+            <param name="delimiter" value="tab" />
+            <param name="unknowns_strategy" value="print"/>
+            <param name="pass_comments" value="#"/>
+            <output name="outfile_replace" file="neg_test_commented.txt"/>
+        </test>
     </tests>
     <help>
         <![CDATA[
 **What it does**
 
-This tool replaces the entries of a defined column with entries given by a replacement file. 
-For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. 
+This tool replaces the entries of a defined column with entries given by a replacement file.
+For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation.
 A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation.
 
 A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings
b
diff -r cc18bac5afdb -r d533e4b75800 test-data/neg_test_commented.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/neg_test_commented.txt Sun Sep 23 04:03:34 2018 -0400
b
@@ -0,0 +1,2 @@
+#test
+NC_000964.33 should not match
b
diff -r cc18bac5afdb -r d533e4b75800 test-data/neg_test_map.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/neg_test_map.txt Sun Sep 23 04:03:34 2018 -0400
b
@@ -0,0 +1,1 @@
+NC_000964.3 Chromosome
b
diff -r cc18bac5afdb -r d533e4b75800 test-data/original_file
--- a/test-data/original_file Fri Feb 24 10:14:15 2017 -0500
+++ b/test-data/original_file Sun Sep 23 04:03:34 2018 -0400
b
@@ -1,51 +1,51 @@
 track type="bedGraph" description="BT089 CpG merged methylation fractions"
-1       10468   10470   0.209302
-1       10470   10472   0.611111
-1       10483   10485   0.428571
-1       10488   10490   0.846154
-1       10492   10494   0.666667
-1       10496   10498   0.916667
-1       10524   10526   0.916667
-1       10541   10543   0.818182
-1       10562   10564   0.615385
-1       10570   10572   0.916667
-1       10576   10578   0.615385
-1       10578   10580   0.538462
-1       10588   10590   0.909091
-1       10608   10610   0.700000
-1       10616   10618   0.875000
-1       10619   10621   0.714286
-1       10630   10632   0.428571
-1       10632   10634   0.444444
-1       10635   10637   0.400000
-1       10637   10639   0.400000
-1       10640   10642   0.900000
-1       10643   10645   0.600000
-1       10649   10651   0.727273
-1       10659   10661   0.857143
-1       10661   10663   0.428571
-1       10664   10666   0.846154
-1       10666   10668   0.750000
-1       10669   10671   0.916667
-1       10672   10674   0.916667
-1       10678   10680   1.000000
-1       10688   10690   0.900000
-1       10690   10692   0.545455
-1       10693   10695   1.000000
-1       10695   10697   0.909091
-1       10698   10700   0.916667
-1       10701   10703   1.000000
-1       10707   10709   1.000000
-1       10717   10719   0.866667
-1       10719   10721   0.692308
-1       10722   10724   1.000000
-1       10724   10726   0.933333
-1       10727   10729   0.933333
-1       10730   10732   1.000000
-1       10736   10738   0.933333
-1       10746   10748   0.857143
-1       10748   10750   0.500000
-1       10751   10753   0.928571
-1       10753   10755   0.857143
-1       10756   10758   1.000000
-1       10759   10761   0.857143
+1 10468 10470 0.209302
+1 10470 10472 0.611111
+1 10483 10485 0.428571
+1 10488 10490 0.846154
+1 10492 10494 0.666667
+1 10496 10498 0.916667
+1 10524 10526 0.916667
+1 10541 10543 0.818182
+1 10562 10564 0.615385
+1 10570 10572 0.916667
+1 10576 10578 0.615385
+1 10578 10580 0.538462
+1 10588 10590 0.909091
+1 10608 10610 0.700000
+1 10616 10618 0.875000
+1 10619 10621 0.714286
+1 10630 10632 0.428571
+1 10632 10634 0.444444
+1 10635 10637 0.400000
+1 10637 10639 0.400000
+1 10640 10642 0.900000
+1 10643 10645 0.600000
+1 10649 10651 0.727273
+1 10659 10661 0.857143
+1 10661 10663 0.428571
+1 10664 10666 0.846154
+1 10666 10668 0.750000
+1 10669 10671 0.916667
+1 10672 10674 0.916667
+1 10678 10680 1.000000
+1 10688 10690 0.900000
+1 10690 10692 0.545455
+1 10693 10695 1.000000
+1 10695 10697 0.909091
+1 10698 10700 0.916667
+1 10701 10703 1.000000
+1 10707 10709 1.000000
+1 10717 10719 0.866667
+1 10719 10721 0.692308
+1 10722 10724 1.000000
+1 10724 10726 0.933333
+1 10727 10729 0.933333
+1 10730 10732 1.000000
+1 10736 10738 0.933333
+1 10746 10748 0.857143
+1 10748 10750 0.500000
+1 10751 10753 0.928571
+1 10753 10755 0.857143
+1 10756 10758 1.000000
+1 10759 10761 0.857143