Mercurial > repos > immport-devteam > check_headers
changeset 1:05440ef97f8b draft default tip
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/check_headers commit 14d780f8710fb0962a85c262d0689a9551f4f8e1"
author | azomics |
---|---|
date | Tue, 14 Jul 2020 09:46:31 -0400 |
parents | e88c99a4fb36 |
children | |
files | check_headers/getHeaders.py check_headers/getHeaders.xml check_headers/test-data/input1.txt check_headers/test-data/input2.txt check_headers/test-data/input3.txt check_headers/test-data/output.tabular getHeaders.py getHeaders.xml test-data/input1.txt test-data/input2.txt test-data/input3.txt test-data/output.tabular |
diffstat | 12 files changed, 178 insertions(+), 150 deletions(-) [+] |
line wrap: on
line diff
--- a/check_headers/getHeaders.py Mon Feb 27 12:41:17 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -#!/usr/bin/env python -###################################################################### -# Copyright (c) 2016 Northrop Grumman. -# All rights reserved. -###################################################################### -from __future__ import print_function -import sys - -from argparse import ArgumentParser - - -def print_headers(files, filenames, outfile): - with open(outfile, "w") as outf: - for i, eachfile in enumerate(files): - with open(eachfile, "r") as ef: - headers = ef.readline() - outf.write("\t".join([filenames[i], headers])) - return - - -if __name__ == "__main__": - parser = ArgumentParser( - prog="GetHeaders", - description="Gets the headers of all files in given set.") - - parser.add_argument( - '-i', - dest="input_files", - required=True, - action='append', - help="File location for the text files.") - - parser.add_argument( - '-n', - dest="file_names", - required=True, - action='append', - help="File names.") - - parser.add_argument( - '-o', - dest="output_file", - required=True, - help="Name of the output file.") - - args = parser.parse_args() - input_files = [f for f in args.input_files] - file_names = [fn for fn in args.file_names] - print_headers(input_files, file_names, args.output_file) - sys.exit(0)
--- a/check_headers/getHeaders.xml Mon Feb 27 12:41:17 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -<tool id="get_headers" name="Check headers" version="1.1"> - <description>of any set of flowtext files.</description> - <stdio> - <exit_code range="1:" /> - </stdio> - <command><![CDATA[ - python $__tool_directory__/getHeaders.py -o "${output_file}" - #for $f in $input# - -i "${f}" - -n "${f.name}" - #end for# - ]]> - </command> - <inputs> - <param format="flowtext" name="input" type="data_collection" collection_type="list" label="Text files Collection"/> - </inputs> - <outputs> - <data format="tabular" name="output_file" label="Headers of files in ${input.name}"/> - </outputs> - <tests> - <test> - <param name="input"> - <collection type="list"> - <element name="input1.txt" value="input1.txt"/> - <element name="input2.txt" value="input2.txt"/> - <element name="input3.txt" value="input3.txt"/> - </collection> - </param> - <output name="output_file" file="output.tabular"/> - </test> - </tests> - <help><![CDATA[ - This tool returns a table of the headers of a set of text files. - ------ - -**Input files** - -This tool requires collections of txt, flowtext or tabular files as input. - -**Output file** - -The output file is a table listing the headers for each file. - ------ - -**Example** - -*File1*:: - - Marker1 Marker2 Marker3 - 34 45 12 - 33 65 10 - -*File2*:: - - Marker4 Marker5 Marker3 - 19 62 98 - 12 36 58 - -*Output*:: - - File1 Marker1 Marker2 Marker3 - File2 Marker4 Marker5 Marker3 - ]]> - </help> -</tool>
--- a/check_headers/test-data/input1.txt Mon Feb 27 12:41:17 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 -449 157 551 129 169 292 -894 1023 199 277 320 227 -262 73 437 69 0 146 -340 115 509 268 0 74 -316 76 50 0 60 129 -394 144 83 138 335 194 -383 139 499 0 0 224 -800 1023 239 284 288 280 -388 97 534 111 83 177
--- a/check_headers/test-data/input2.txt Mon Feb 27 12:41:17 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5 -363 76 550 200 0 127 -372 126 519 44 51 148 -1023 1023 289 401 362 254 -770 1023 175 361 225 237 -384 111 525 121 0 138 -602 578 385 286 222 131 -788 1023 216 310 270 294 -420 211 552 479 0 62 -668 1019 73 193 227 132
--- a/check_headers/test-data/input3.txt Mon Feb 27 12:41:17 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA -289 56 438 0 626 480 -352 153 30 147 483 386 -383 190 156 228 734 408 -261 62 432 121 598 555 -451 120 537 338 568 201 -373 104 3 110 621 584 -418 105 561 0 610 562 -358 185 0 292 641 327 -733 970 139 227 293 259
--- a/check_headers/test-data/output.tabular Mon Feb 27 12:41:17 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -input1.txt Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 -input2.txt Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5 -input3.txt Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getHeaders.py Tue Jul 14 09:46:31 2020 -0400 @@ -0,0 +1,73 @@ +#!/usr/bin/env python +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### +# +# Version 1.2 - May 2018 +# added leeway for files with different nb of headers +# + + +import sys + +from argparse import ArgumentParser + + +def print_headers(files, filenames, outfile): + header_table = {} + for i, eachfile in enumerate(files): + with open(eachfile, "r") as ef: + headers = ef.readline().strip() + header_table[filenames[i]] = headers.split("\t") + + h = 0 + for f in header_table: + j = len(header_table[f]) + 1 + if j > h: + h = j + + idx = [str(x) for x in range(1, h)] + + with open(outfile, "w") as outf: + outf.write("Index\t") + outf.write("\t".join(idx) + "\n") + for f in header_table: + if len(header_table[f]) < h: + for k in range(len(header_table[f]), h-1): + header_table[f].append("") + sys.stderr.write(str(len(header_table[f]))) + outf.write(f + "\t") + outf.write("\t".join(header_table[f]) + "\n") + return + + +if __name__ == "__main__": + parser = ArgumentParser( + prog="GetHeaders", + description="Gets the headers of all files in given set.") + + parser.add_argument( + '-i', + dest="input_files", + required=True, + action='append', + help="File location for the text files.") + + parser.add_argument( + '-n', + dest="file_names", + required=True, + action='append', + help="File names.") + + parser.add_argument( + '-o', + dest="output_file", + required=True, + help="Name of the output file.") + + args = parser.parse_args() + input_files = [f for f in args.input_files] + file_names = [fn for fn in args.file_names] + print_headers(input_files, file_names, args.output_file)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getHeaders.xml Tue Jul 14 09:46:31 2020 -0400 @@ -0,0 +1,71 @@ +<tool id="get_headers" name="Check headers" version="1.1+galaxy0" profile="18.01"> + <description>of any set of flowtext files</description> + <requirements> + <requirement type="package" version="3.8.3">python</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + python3 '$__tool_directory__/getHeaders.py' -o '${output_file}' + #for $f in $input + -i '${f}' + -n '${f.name}' + #end for + ]]> + </command> + <inputs> + <param format="flowtext,tabular,txt" name="input" type="data_collection" collection_type="list" label="Text files Collection"/> + </inputs> + <outputs> + <data format="tabular" name="output_file" label="Headers of files in ${input.name}"/> + </outputs> + <tests> + <test> + <param name="input"> + <collection type="list"> + <element name="input1.txt" value="input1.txt"/> + <element name="input2.txt" value="input2.txt"/> + <element name="input3.txt" value="input3.txt"/> + </collection> + </param> + <output name="output_file" file="output.tabular" lines_diff="4"/> + </test> + </tests> + <help><![CDATA[ + This tool returns a table of the headers of a set of text files. + +----- + +**Input files** + +This tool requires collections of txt, flowtext or tabular files as input. + +**Output file** + +The output file is a table listing the headers for each file. + +----- + +**Example** + +*File1*:: + + Marker1 Marker2 Marker3 + 34 45 12 + 33 65 10 + +*File2*:: + + Marker4 Marker5 Marker3 + 19 62 98 + 12 36 58 + +*Output*:: + + Index 1 2 3 + File1 Marker1 Marker2 Marker3 + File2 Marker4 Marker5 Marker3 + ]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input1.txt Tue Jul 14 09:46:31 2020 -0400 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 +449 157 551 129 169 292 +894 1023 199 277 320 227 +262 73 437 69 0 146 +340 115 509 268 0 74 +316 76 50 0 60 129 +394 144 83 138 335 194 +383 139 499 0 0 224 +800 1023 239 284 288 280 +388 97 534 111 83 177
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input2.txt Tue Jul 14 09:46:31 2020 -0400 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5 +363 76 550 200 0 127 +372 126 519 44 51 148 +1023 1023 289 401 362 254 +770 1023 175 361 225 237 +384 111 525 121 0 138 +602 578 385 286 222 131 +788 1023 216 310 270 294 +420 211 552 479 0 62 +668 1019 73 193 227 132
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input3.txt Tue Jul 14 09:46:31 2020 -0400 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA +289 56 438 0 626 480 +352 153 30 147 483 386 +383 190 156 228 734 408 +261 62 432 121 598 555 +451 120 537 338 568 201 +373 104 3 110 621 584 +418 105 561 0 610 562 +358 185 0 292 641 327 +733 970 139 227 293 259
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.tabular Tue Jul 14 09:46:31 2020 -0400 @@ -0,0 +1,4 @@ +Index 1 2 3 4 5 6 +input3.txt Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA +input2.txt Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5 +input1.txt Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4