# HG changeset patch # User artbio # Date 1553080636 14400 # Node ID 3a4694d4354fd5f93bca6a8d61cc2b0f355ba6e1 # Parent 6f54dc6b37daadcb93b740444875e63a85c89e95 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 618a7892f6af26278364a75ab23b3c6d8cdc73db diff -r 6f54dc6b37da -r 3a4694d4354f catWrapper.xml --- a/catWrapper.xml Sun Mar 11 18:19:40 2018 -0400 +++ b/catWrapper.xml Wed Mar 20 07:17:16 2019 -0400 @@ -1,43 +1,246 @@ - - tail-to-head + + tail-to-head by specifying how "$out_file1" + #if $headers == 0: + #set $concat_command = "cat" #else: - #for $file in $input - printf "# ${file.element_identifier}\n" >> "$out_file1" && - cat "$file" >> "$out_file1" && - #end for - sleep 1 + #set $concat_command = 'tail -q -n +'+ str(int($headers)+1) + #end if + #if $global_condition.input_type == "singles": + #if $dataset_names == "No": + $concat_command + #for $file in $global_condition.inputs + '$file' + #end for + > '$out_file1' + #else: + #for $file in $global_condition.inputs + #if $file.ext[-2:] == "gz": + printf "# ${file.element_identifier}\n" | gzip -c >> '$out_file1' && + gzip -dc "$file" | $concat_command |gzip -c >> '$out_file1' && + #else: + printf "# ${file.element_identifier}\n" >> '$out_file1' && + $concat_command "$file" >> '$out_file1' && + #end if + #end for + sleep 1 + #end if + #else if $global_condition.input_type == "paired_collection": + #if $global_condition.paired_cat_type == "by_strand": + #if $dataset_names == "No": + #for $file in $global_condition.inputs + $concat_command + $file['forward'] + >> '$forward' && + $concat_command + $file['reverse'] + >> '$reverse' && + #end for + sleep 1 + #else: + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" >> '$forward' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> '$forward' && + printf "# ${file}_reverse\n" >> '$reverse' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> '$reverse' && + #end for + sleep 1 + #end if + #else if $global_condition.paired_cat_type == "by_pair": + mkdir concatenated && + #if $dataset_names == "No": + #for $file in $global_condition.inputs.keys() + $concat_command + $global_condition.inputs[$file]['forward'] + > concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #end for + sleep 1 + #else: + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" > concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && + printf "# ${file}_reverse\n" >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #end for + sleep 1 + #end if + #else if $global_condition.paired_cat_type == "all": + #if $dataset_names == "No": + #for $file in $global_condition.inputs.keys() + $concat_command + $global_condition.inputs[$file]['forward'] + >> $out_file1 && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> $out_file1 && + #end for + sleep 1 + #else: + #for $file in $global_condition.inputs.keys() + printf "# ${file}_forward\n" > $out_file1 && + $concat_command + $global_condition.inputs[$file]['forward'] + >> $out_file1 && + printf "# ${file}_reverse\n" >> $out_file1 && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> $out_file1 && + #end for + sleep 1 + #end if + #end if #end if ]]> - - - - - + + + + + + + + + + + + + + + + + + + - + + global_condition['input_type'] == 'singles' or (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all') + + + + + global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand' + + + + global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair' + - - - + + + + + + - - - + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -45,15 +248,23 @@ **WARNING:** This tool does not check if the datasets being concatenated are in the same format. +**WARNING:** The paired collection operations do not handle gziped files. + ----- **What it does** -Concatenates datasets +Concatenates datasets and paired collections with multiple options: + + - It's possible select either a concatenation by strand, by pair or a whole collection concatenation, when the input is a paired collection. + + - Skipping lines before concatenation to avoid headers + + - Add the name of the concatenated files as separator ----- -**Example** +**Single datasets concatenation example** Concatenating Dataset:: @@ -83,6 +294,98 @@ ----- +**Paired collection concatenation example** + +1rst pair:: + + forward - reverse + +2nd pair:: + + forward - reverse + +Concatenation by strand:: + + concatenates: + + 1rst forward + 2nd forward + 1rst reverse + 2nd reverse + + outputs: + + 1 pair + +Concatenation by pair:: + + concatenates: + + 1rst forward + 1rst reverse + 2nd forward + 2nd reverse + + outputs: + + 2 datasets + +Concatenate all:: + + concatenates: + + 1rst forward + 1rst reverse + 2nd forward + 2nd reverse + + outputs: + + 1 dataset + +----- + +**When selecting "Include dataset names" when concatenating files**: + +1rst file name="first_tabular":: + + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + +2nd file name="second_tabular":: + + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +output:: + + # first_tabular + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + # second_tabular + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +----- + +**Skiping lines** + +1rst file:: + + chrX 151087187 151087355 A 0 - + chrX 151572400 151572481 B 0 + + +2nd file:: + + chr1 151242630 151242955 X 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +skipping 1 line + +output:: + + chrX 151572400 151572481 B 0 + + chr1 151271715 151271999 Y 0 + + chr1 151278832 151279227 Z 0 - + +----- + Adapted from galaxy's catWrapper.xml to allow multiple input files. diff -r 6f54dc6b37da -r 3a4694d4354f test-data/1.fastq.gz Binary file test-data/1.fastq.gz has changed diff -r 6f54dc6b37da -r 3a4694d4354f test-data/1_f.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1_f.fastq Wed Mar 20 07:17:16 2019 -0400 @@ -0,0 +1,12 @@ +@NS500801:189:HWGFJBGX5:1:11101:18757:1118 1:N:0:GCCAAT +CACCGTTACATCACACCAACCAAGATTACATCACACCACTCAGGATGACAGACAGTAATCCTGTTTGTTATATANG ++ +AAAAAEEEEEEEEEEEEAEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEE