comparison replaceColumn.xml @ 0:cc18bac5afdb draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 045006e0b2fe5b4fe96583949b0c757eb6a734a4
author bgruening
date Fri, 24 Feb 2017 10:14:15 -0500
parents
children d533e4b75800
comparison
equal deleted inserted replaced
-1:000000000000 0:cc18bac5afdb
1 <tool id="replace_column_with_key_value_file" name="Replace column" version="0.1">
2 <description>by values which are defined in a convert file</description>
3 <command>
4 <![CDATA[
5 python '$replaceColumnScript'
6 ]]>
7 </command>
8 <configfiles>
9 <configfile name="replaceColumnScript">
10 <![CDATA[
11 import sys
12
13 replace_file = '$replace_information'
14 original_file = '$original_file'
15 column = int("$column_replace") - 1
16 ignore_start_lines = int("$skip_lines")
17 delimiter_local = "\t" if str("$delimiter") == "" else str("$delimiter")
18
19 ## read conversion information to index
20 conversion = {}
21
22 with open(replace_file, 'r') as conversion_file:
23 for line in conversion_file:
24 conv_key_value = line.strip().split()
25 if len(conv_key_value) == 2:
26 conversion[conv_key_value[0]] = conv_key_value[1]
27
28 ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped.
29 with open("output_file", 'w') as output:
30 with open(original_file) as original:
31 for i, line in enumerate(original):
32 if i < ignore_start_lines:
33 output.write(line)
34 continue
35
36 if str("$delimiter") == "":
37 line_content = line.split()
38 else:
39 line_content = line.split(str("$delimiter"))
40
41 out = list()
42 for j, line_content_column in enumerate(line_content):
43 if j == column:
44 if line_content_column in conversion:
45 out.append(conversion[line_content_column])
46 else:
47 out.append(line_content_column)
48
49 if len(out) == len(line_content):
50 output.write('%s\n' % delimiter_local.join(out))
51
52 ]]>
53 </configfile>
54 </configfiles>
55 <inputs>
56 <param name="original_file" type="data" format="tabular"
57 label="File in which you want to replace some values"
58 help="The entries of a specific column are replaced by the information given by the next input file." />
59 <param name="replace_information" type="data" format="tabular"
60 label="Replace information file"
61 help="This file contains in the first column the entries that should be replaced by the values of the second column." />
62 <param name="column_replace" type="data_column" data_ref="original_file" multiple="false"
63 label="Which column should be replaced?" />
64 <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" />
65 <param name="delimiter" type="select" label="Delimited by">
66 <option value="" selected="True">Tab</option>
67 <option value=" ">Whitespace</option>
68 <option value=".">Dot</option>
69 <option value=",">Comma</option>
70 <option value="-">Dash</option>
71 <option value="_">Underscore</option>
72 <option value="|">Pipe</option>
73 </param>
74 </inputs>
75 <outputs>
76 <data name="outfile_replace" format="txt" from_work_dir="output_file"/>
77 </outputs>
78 <tests>
79 <test>
80 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" />
81 <param name="original_file" value="original_file" ftype="tabular" />
82 <param name="column_replace" value="1"/>
83 <param name="skip_lines" value="1"/>
84 <param name="delimiter" value="" />
85 <output name="outfile_replace" file="result_file"/>
86 </test>
87 <test>
88 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" />
89 <param name="original_file" value="empty_mapping" ftype="tabular" />
90 <param name="column_replace" value="1"/>
91 <param name="skip_lines" value="1"/>
92 <param name="delimiter" value="" />
93 <output name="outfile_replace" file="result_file_empty_mapping"/>
94 </test>
95 </tests>
96 <help>
97 <![CDATA[
98 **What it does**
99
100 This tool replaces the entries of a defined column with entries given by a replacement file.
101 For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation.
102 A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation.
103
104 A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings
105 ]]>
106 </help>
107 </tool>