comparison replaceColumn.xml @ 1:d533e4b75800 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 0def21576e206a0732ce63bacd18533064ddf155
author bgruening
date Sun, 23 Sep 2018 04:03:34 -0400
parents cc18bac5afdb
children
comparison
equal deleted inserted replaced
0:cc18bac5afdb 1:d533e4b75800
1 <tool id="replace_column_with_key_value_file" name="Replace column" version="0.1"> 1 <tool id="replace_column_with_key_value_file" name="Replace column" version="0.2">
2 <description>by values which are defined in a convert file</description> 2 <description>by values which are defined in a convert file</description>
3 <command> 3 <command>
4 <![CDATA[ 4 <![CDATA[
5 python '$replaceColumnScript' 5 python '$replaceColumnScript'
6 ]]> 6 ]]>
12 12
13 replace_file = '$replace_information' 13 replace_file = '$replace_information'
14 original_file = '$original_file' 14 original_file = '$original_file'
15 column = int("$column_replace") - 1 15 column = int("$column_replace") - 1
16 ignore_start_lines = int("$skip_lines") 16 ignore_start_lines = int("$skip_lines")
17 delimiter_local = "\t" if str("$delimiter") == "" else str("$delimiter") 17 delimiter_local = "\t" if str("$delimiter") == "tab" else str("$delimiter")
18 comment_str = str("$pass_comments")
19 unk_strat = str("$unknowns_strategy")
18 20
19 ## read conversion information to index 21 ## read conversion information to index
20 conversion = {} 22 conversion = {}
21 23
22 with open(replace_file, 'r') as conversion_file: 24 with open(replace_file, 'r') as conversion_file:
23 for line in conversion_file: 25 for line in conversion_file:
24 conv_key_value = line.strip().split() 26 conv_key_value = line.strip().split()
25 if len(conv_key_value) == 2: 27 if len(conv_key_value) == 2:
26 conversion[conv_key_value[0]] = conv_key_value[1] 28 conversion[conv_key_value[0]] = conv_key_value[1]
27 29
28 ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped. 30 ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped.
29 with open("output_file", 'w') as output: 31 with open("output_file", 'w') as output:
30 with open(original_file) as original: 32 with open(original_file) as original:
31 for i, line in enumerate(original): 33 for i, line in enumerate(original):
32 if i < ignore_start_lines: 34 if i < ignore_start_lines or (comment_str and line.startswith(comment_str)):
33 output.write(line) 35 output.write(line)
34 continue 36 continue
35 37
36 if str("$delimiter") == "": 38 line_content = line.rstrip().split(delimiter_local)
37 line_content = line.split()
38 else:
39 line_content = line.split(str("$delimiter"))
40 39
41 out = list() 40 out = list()
42 for j, line_content_column in enumerate(line_content): 41 for j, line_content_column in enumerate(line_content):
43 if j == column: 42 if j == column:
43
44 if line_content_column in conversion: 44 if line_content_column in conversion:
45 out.append(conversion[line_content_column]) 45 out.append(conversion[line_content_column])
46 elif unk_strat == "print":
47 out.append(line_content_column)
48 elif unk_strat == "error":
49 raise Exception('ERROR: Encountered a value [%s] in the file that is not in the replacements file and is not commented with [%s]' % (line_content_column, comment_str))
46 else: 50 else:
47 out.append(line_content_column) 51 out.append(line_content_column)
48 52
49 if len(out) == len(line_content): 53 if len(out) == len(line_content):
50 output.write('%s\n' % delimiter_local.join(out)) 54 output.write('%s\n' % delimiter_local.join(out))
61 help="This file contains in the first column the entries that should be replaced by the values of the second column." /> 65 help="This file contains in the first column the entries that should be replaced by the values of the second column." />
62 <param name="column_replace" type="data_column" data_ref="original_file" multiple="false" 66 <param name="column_replace" type="data_column" data_ref="original_file" multiple="false"
63 label="Which column should be replaced?" /> 67 label="Which column should be replaced?" />
64 <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" /> 68 <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" />
65 <param name="delimiter" type="select" label="Delimited by"> 69 <param name="delimiter" type="select" label="Delimited by">
66 <option value="" selected="True">Tab</option> 70 <option value="tab" selected="True">Tab</option>
67 <option value=" ">Whitespace</option> 71 <option value=" ">Space</option>
68 <option value=".">Dot</option> 72 <option value=".">Dot</option>
69 <option value=",">Comma</option> 73 <option value=",">Comma</option>
70 <option value="-">Dash</option> 74 <option value="-">Dash</option>
71 <option value="_">Underscore</option> 75 <option value="_">Underscore</option>
72 <option value="|">Pipe</option> 76 <option value="|">Pipe</option>
77 </param>
78 <param name="unknowns_strategy" type="select" label="When an unknown value is encountered">
79 <option value="skip" selected="True">Skip / Do not print</option>
80 <option value="print">Print without modification</option>
81 <option value="error">Exit with an error</option>
82 </param>
83 <param name="pass_comments" type="text" value="#" label="Do not perform replacement on lines starting with">
84 <sanitizer>
85 <valid>
86 <add value="#" />
87 </valid>
88 </sanitizer>
73 </param> 89 </param>
74 </inputs> 90 </inputs>
75 <outputs> 91 <outputs>
76 <data name="outfile_replace" format="txt" from_work_dir="output_file"/> 92 <data name="outfile_replace" format="txt" from_work_dir="output_file"/>
77 </outputs> 93 </outputs>
79 <test> 95 <test>
80 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> 96 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" />
81 <param name="original_file" value="original_file" ftype="tabular" /> 97 <param name="original_file" value="original_file" ftype="tabular" />
82 <param name="column_replace" value="1"/> 98 <param name="column_replace" value="1"/>
83 <param name="skip_lines" value="1"/> 99 <param name="skip_lines" value="1"/>
84 <param name="delimiter" value="" /> 100 <param name="delimiter" value="tab" />
101 <param name="unknowns_strategy" value="skip"/>
102 <param name="pass_comments" value="#"/>
85 <output name="outfile_replace" file="result_file"/> 103 <output name="outfile_replace" file="result_file"/>
86 </test> 104 </test>
87 <test> 105 <test>
88 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> 106 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" />
89 <param name="original_file" value="empty_mapping" ftype="tabular" /> 107 <param name="original_file" value="empty_mapping" ftype="tabular" />
90 <param name="column_replace" value="1"/> 108 <param name="column_replace" value="1"/>
91 <param name="skip_lines" value="1"/> 109 <param name="skip_lines" value="1"/>
92 <param name="delimiter" value="" /> 110 <param name="delimiter" value="tab" />
111 <param name="unknowns_strategy" value="skip"/>
112 <param name="pass_comments" value="#"/>
93 <output name="outfile_replace" file="result_file_empty_mapping"/> 113 <output name="outfile_replace" file="result_file_empty_mapping"/>
114 </test>
115 <test expect_failure="True">
116 <param name="replace_information" value="neg_test_map.txt" ftype="tabular" />
117 <param name="original_file" value="neg_test_commented.txt" ftype="tabular" />
118 <param name="column_replace" value="1"/>
119 <param name="skip_lines" value="0"/>
120 <param name="delimiter" value="tab" />
121 <param name="unknowns_strategy" value="error"/>
122 <param name="pass_comments" value="#"/>
123 </test>
124 <test>
125 <param name="replace_information" value="neg_test_map.txt" ftype="tabular" />
126 <param name="original_file" value="neg_test_commented.txt" ftype="tabular" />
127 <param name="column_replace" value="1"/>
128 <param name="skip_lines" value="0"/>
129 <param name="delimiter" value="tab" />
130 <param name="unknowns_strategy" value="print"/>
131 <param name="pass_comments" value="#"/>
132 <output name="outfile_replace" file="neg_test_commented.txt"/>
94 </test> 133 </test>
95 </tests> 134 </tests>
96 <help> 135 <help>
97 <![CDATA[ 136 <![CDATA[
98 **What it does** 137 **What it does**
99 138
100 This tool replaces the entries of a defined column with entries given by a replacement file. 139 This tool replaces the entries of a defined column with entries given by a replacement file.
101 For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. 140 For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation.
102 A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation. 141 A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation.
103 142
104 A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings 143 A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings
105 ]]> 144 ]]>
106 </help> 145 </help>