annotate multijoin @ 5:20344ce0c811 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
author bgruening
date Wed, 23 Nov 2016 15:56:41 -0500
parents 5314e5d6f040
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
1 #!/usr/bin/env perl
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
2 use strict;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
3 use warnings;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
4 use Getopt::Long qw(:config no_ignore_case);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
5 use Data::Dumper;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
6 use Carp;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
7 use File::Basename;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
8
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
9 my $version = "0.1.1";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
10 my $field_sep = "\t";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
11 my $key_column;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
12 my @values_columns;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
13 my $max_value_column;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
14 my @input_files;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
15 my $input_headers ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
16 my $output_headers;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
17 my $filler = "0";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
18 my $filler_string ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
19 my $ignore_duplicates;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
20 my $debug = 0 ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
21 my %input_headers;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
22 my $have_file_labels;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
23 my %file_labels;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
24
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
25 sub parse_command_line_parameters();
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
26 sub show_help();
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
27 sub read_input_file($);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
28 sub print_combined_data();
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
29 sub sanitize_filename($);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
30 sub print_output_header();
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
31 sub show_examples();
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
32
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
33 ##
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
34 ## Program Start
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
35 ##
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
36
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
37 parse_command_line_parameters();
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
38
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
39 my %data;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
40 foreach my $file (@input_files) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
41 read_input_file($file);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
42 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
43 #print STDERR Dumper(\%input_headers),"\n";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
44 #print STDERR Dumper(\%data) if $debug;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
45 print_output_header() if $output_headers;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
46 print_combined_data();
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
47
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
48
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
49 ##
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
50 ## Program End
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
51 ##
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
52 sub print_output_header()
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
53 {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
54 my @output = ("key");
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
55 foreach my $file ( @input_files ) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
56 foreach my $column ( @values_columns ) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
57 my $column_name = ( exists $input_headers{$file}->{$column} ) ?
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
58 $input_headers{$file}->{$column} :
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
59 "V$column" ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
60
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
61 push @output, $file_labels{$file} . "_" . $column_name;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
62 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
63 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
64 print join($field_sep,@output),"\n"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
65 or die "Output error: can't write output line: $!\n";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
66 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
67
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
68 sub print_combined_data()
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
69 {
5
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 0
diff changeset
70 my @keys = sort keys %data ;
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
71
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
72 foreach my $key ( @keys ) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
73 my @outputs;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
74
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
75 foreach my $file (@input_files) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
76 push @outputs,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
77 (exists $data{$key}->{$file}) ? $data{$key}->{$file} : $filler_string;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
78 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
79
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
80 print join($field_sep,$key,@outputs),"\n"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
81 or die "Output error: can't write output line: $!\n";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
82 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
83 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
84
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
85 sub sanitize_filename($)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
86 {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
87 my ($filename) = shift or croak "missing file name";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
88 my $file_ID = basename($filename);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
89 $file_ID =~ s/\.\w+$//; # remove extension
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
90 $file_ID =~ s/^[^\w\.\-]+//;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
91 $file_ID =~ s/[^\w\.\-]+$//;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
92 $file_ID =~ s/[^\w\.\-]+/_/g; # sanitize bad characters
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
93 return $file_ID;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
94 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
95
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
96 sub read_input_file($)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
97 {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
98 my ($filename) = shift or croak "Missing input file name";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
99
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
100 my @value_indexes = map { $_-1 } @values_columns; #zero-based indexes for value columns
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
101
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
102 open FILE, "<", $filename
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
103 or die "Error: can't open file '$filename': $!\n";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
104
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
105 ## Read file's header
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
106 if ($input_headers) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
107 my $line = <FILE>;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
108 chomp $line;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
109 my @fields = split $field_sep, $line;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
110
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
111 my $num_input_fields = scalar(@fields);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
112 die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
113
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
114 foreach my $col (@values_columns) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
115 $input_headers{$filename}->{$col} = $fields[$col-1] ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
116 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
117 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
118
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
119
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
120 ## Read file's data
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
121 while ( my $line = <FILE> ) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
122 chomp $line;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
123 my @fields = split $field_sep, $line;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
124
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
125 my $num_input_fields = scalar(@fields);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
126 die "Input error: file '$filename' line $. doesn't have enough columns (key column = $key_column, line has only $num_input_fields columns)\n" if $num_input_fields < $key_column ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
127 die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
128
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
129
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
130 my $key = $fields[$key_column-1];
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
131 my $value = join($field_sep, @fields[@value_indexes]);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
132
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
133 die "Input error: file '$filename' line $. have duplicated key '$key'.\n"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
134 if (exists $data{$key}->{$filename} && !$ignore_duplicates) ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
135 $data{$key}->{$filename} = $value;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
136 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
137 close FILE
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
138 or die "Error: can't write and close file '$filename': $!\n";
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
139 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
140
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
141 sub parse_command_line_parameters()
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
142 {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
143 my $values_columns_string;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
144
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
145 my $rc = GetOptions("help" => \&show_help,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
146 "key|k=i" => \$key_column,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
147 "values|v=s" => \$values_columns_string,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
148 "t=s" => \$field_sep,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
149 "in-header" => \$input_headers,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
150 "out-header|h" => \$output_headers,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
151 "H" => sub { $input_headers = 1 ; $output_headers = 1 ; },
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
152 "ignore-dups" => \$ignore_duplicates,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
153 "filler|f=s" => \$filler,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
154 "examples" => \&show_examples,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
155 "labels" => \$have_file_labels,
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
156 );
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
157 die "Error: inalid command-line parameters.\n" unless $rc;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
158
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
159 die "Error: missing key column. use --key N. see --help for more details.\n" unless defined $key_column;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
160 die "Error: Invalid key column ($key_column). Must be bigger than zero. see --help for more details.\n" if $key_column <= 0 ;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
161
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
162 die "Error: missing values column. use --values V1,V2,Vn. See --help for more details.\n" unless defined $values_columns_string;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
163 @values_columns = split(/\s*,\s*/, $values_columns_string);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
164
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
165 die "Error: missing values column. use --values N,N,N. see --help for more details.\n" unless scalar(@values_columns)>0;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
166 foreach my $v (@values_columns) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
167 die "Error: invalid value column ($v), please use only numbers>=1. see --help for more details.\n"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
168 unless $v =~ /^\d+$/ && $v>=1;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
169
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
170 $max_value_column = $v unless defined $max_value_column && $max_value_column>$v;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
171 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
172
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
173 $filler_string = join($field_sep, map { $filler } @values_columns);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
174
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
175
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
176 if ($have_file_labels) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
177 ## have file labels - each pair of parameters is a file/label pair.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
178 die "Error: missing input files and labels\n" if scalar(@ARGV)==0;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
179 die "Error: when using --labels, a pair of file names + labels is required (got odd number of argiments)\n" unless scalar(@ARGV)%2==0;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
180
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
181 while (@ARGV) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
182 my $filename = shift @ARGV;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
183 my $label = shift @ARGV;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
184 $label =~ s/^[^\.\w\-]+//;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
185 $label =~ s/[^\.\w\-]+$//g;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
186 $label =~ s/[^\.\w\-]+/_/g;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
187
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
188 my $file_ID = sanitize_filename($filename);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
189 $file_labels{$filename} = $label;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
190 push @input_files, $filename;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
191 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
192 } else {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
193 ## no file labels - the rest of the arguments are just file names;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
194 @input_files = @ARGV;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
195 die "Error: missing input files\n" if scalar(@input_files)==0;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
196 die "Error: need more than one input file to join.\n" if scalar(@input_files)==1;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
197
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
198 foreach my $file (@input_files) {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
199 my $file_ID = sanitize_filename($file);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
200 $file_labels{$file} = $file_ID;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
201 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
202 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
203
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
204 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
205
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
206 sub show_help()
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
207 {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
208 print<<EOF;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
209 Multi-File join, version $version
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
210 Copyright (C) 2012 - A. Gordon (gordon at cshl dot edu)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
211 License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html)
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
212
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
213 Usage:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
214 multijoin [OPTIONS] -k N -v V1,V2,Vn,.. FILE1 FILE2 ... FILEn
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
215
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
216 Options:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
217
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
218 --help This helpful help screen.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
219
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
220 -k N
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
221 --key N Use column N as key column.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
222
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
223 -v V1,V2,Vn
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
224 --values V1,V2,Vn
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
225 Use columns V1,V2,Vn as value columns - those will be joined
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
226 According to the Key column.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
227 Multiple columns can be specified.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
228
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
229 -t SEP Use SEP as field separator character (default: tab).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
230
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
231 -h
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
232 --out-header Add a header line to the output file.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
233
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
234 --in-header The input files have a header line.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
235 The first line will not be joined.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
236 if '--out-header' is also used, the output column headers will
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
237 be constructed based on the input header column names.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
238
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
239 -H
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
240 --headers Same as '--in-header --out-header' combined.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
241
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
242 --ignore-dups Ignore duplicated keys (within a file).
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
243 By default, duplicated keys cause an error.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
244
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
245 -f X
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
246 --filler X Fill missing values with X.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
247 (Default: '$filler').
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
248
5
20344ce0c811 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 0
diff changeset
249 --labels When printing output headers with '-h', instead of using the file name,
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
250 use specific labels.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
251 Each file name must be followed by a name.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
252
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
253 example (without labels):
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
254 \$ multijoin -h -k 1 -v 2 A.TXT B.TXT C.TXT
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
255
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
256 example (with labels):
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
257 \$ multijoin -h --labels -k 1 -v 2 A.TXT Sample1 B.TXT SampleB C.TXT SampleC
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
258
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
259 --examples Show detailed examples.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
260
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
261 EOF
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
262 exit(0);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
263 }
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
264
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
265 sub show_examples()
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
266 {
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
267 print<<EOF;
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
268
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
269 To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
270
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
271 \$ head *.txt
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
272 ==> AAA.txt <==
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
273 chr4 888449 890171 FBtr0308778 0 + 266 1527 1722
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
274 chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
275 chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
276 chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
277 chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
278 chr4 995793 996435 FBtr0111046 0 + 7 166 642
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
279 chr4 995793 997931 FBtr0111044 0 + 28 683 2138
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
280 chr4 995793 997931 FBtr0111045 0 + 28 683 2138
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
281 chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
282
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
283 ==> BBB.txt <==
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
284 chr4 90286 134453 FBtr0309803 0 + 657 29084 44167
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
285 chr4 251355 266499 FBtr0089116 0 + 56 1296 15144
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
286 chr4 252050 266506 FBtr0308086 0 + 56 1296 14456
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
287 chr4 252050 266506 FBtr0308087 0 + 56 1296 14456
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
288 chr4 252053 266528 FBtr0300796 0 + 56 1296 14475
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
289 chr4 252053 266528 FBtr0300800 0 + 56 1296 14475
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
290 chr4 252055 266528 FBtr0300798 0 + 56 1296 14473
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
291 chr4 252055 266528 FBtr0300799 0 + 56 1296 14473
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
292 chr4 252541 266528 FBtr0300797 0 + 56 1296 13987
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
293
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
294 ==> CCC.txt <==
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
295 chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
296 chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
297 chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
298 chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
299 chr4 995793 996435 FBtr0111046 0 + 5 304 642
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
300 chr4 995793 997931 FBtr0111044 0 + 17 714 2138
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
301 chr4 995793 997931 FBtr0111045 0 + 17 714 2138
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
302 chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
303
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
304 \$ multijoin -h --key 4 --values 7,8,9 *.txt | head -n 10
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
305 key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
306 FBtr0089116 0 0 0 56 1296 15144 0 0 0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
307 FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
308 FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
309 FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
310 FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
311 FBtr0111044 28 683 2138 0 0 0 17 714 2138
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
312 FBtr0111045 28 683 2138 0 0 0 17 714 2138
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
313 FBtr0111046 7 166 642 0 0 0 5 304 642
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
314 FBtr0300796 0 0 0 56 1296 14475 0 0 0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
315
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
316
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
317
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
318 EOF
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
319 exit(0);
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
320 }