Previous changeset 21:f4ed6a65a2ff (2023-07-27) Next changeset 23:36c418bca8b2 (2023-08-02) |
Commit message:
planemo upload commit 20bdf879b52796d3fb251a20807191ff02084d3c-dirty |
modified:
ChipSeqRatioAnalysis.py ChipSeqRatioDef.xml RM_custom_search.py RM_custom_search.xml RM_html_report.R annot2krona.py cluster_table2krona_format.py cluster_table2krona_format.xml deinterlacer.py fasta_affixer.py fasta_affixer.xml fasta_interlacer.py fasta_interlacer.xml fastq_name_affixer.py fastq_name_affixer.xml name_affixer.py pairScan.py pairScan.xml paired_fastq_filtering.R paired_fastq_filtering.xml paired_fastq_filtering_wrapper.sh parallel.py plot_comparative_clustering_summary.xml renameSequences.xml renameSequences2.py rmsk_summary_table_multiple.r sampleFasta.xml single_fastq_filtering.R single_fastq_filtering.xml single_fastq_filtering_wrapper.sh summarize_cluster_table.R summarize_cluster_table.xml test_run1.sh test_run2.sh tool_data/organele_ref_and_phi-X174.fasta |
b |
diff -r f4ed6a65a2ff -r 58807b35777a ChipSeqRatioDef.xml --- a/ChipSeqRatioDef.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/ChipSeqRatioDef.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,4 +1,4 @@ -<tool id="chip_seq_ratio_1" name="ChIP-Seq Mapper" version="0.1.1"> +<tool id="chip_seq_ratio_1" name="ChIP-Seq Mapper" version="0.1.1.3"> <stdio> <exit_code range="1:" level="fatal" description="Error"/> </stdio> @@ -9,8 +9,12 @@ <requirement type="package">blast</requirement> <!-- <requirement type="package">chip_seq_ration</requirement> --> </requirements> - <command interpreter="python3"> - ChipSeqRatioAnalysis.py + <required_files> + <include type="literal" path="ChipSeqRatioAnalysis.py"/> + <include type="literal" path="ChipSeqRatioAnalysis.R"/> + </required_files> + <command> + python '$__tool_directory__'/ChipSeqRatioAnalysis.py --ChipSeq=${ChipFile} --InputSeq=${InputFile} --Contigs=${ContigFile} |
b |
diff -r f4ed6a65a2ff -r 58807b35777a RM_custom_search.xml --- a/RM_custom_search.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/RM_custom_search.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,33 +1,46 @@ -<tool id="RMsearch2" name="RepeatMasker custom search2" version="1.0.3"> +<tool id="RMsearch2" name="RepeatMasker custom search2" version="1.0.3.3"> - <description>Scan clustering results using RepeatMasker against custom database of repeats</description> - <requirements> - <requirement type="package" version="4.0.7">repeatmasker</requirement> - <requirement type="package" version="3.6">python</requirement> - <requirement type="package" version="2.3.2">r-r2html</requirement> - <requirement type="package" version="2.54.0">bioconductor-biostrings</requirement> - </requirements> + <description>Scan clustering results using RepeatMasker against custom database of + repeats + </description> + <requirements> + <requirement type="package" version="4.0.7">repeatmasker</requirement> + <requirement type="package" version="3.6">python</requirement> + <requirement type="package" version="2.3.2">r-r2html</requirement> + <requirement type="package" version="2.54.0">bioconductor-biostrings</requirement> + </requirements> + <required_files> + <include type="literal" path="RM_custom_search.py"/> + <include type="literal" path="parallel.py"/> + </required_files> - <command> + + <command> - python3 ${__tool_directory__}/RM_custom_search.py -i $input_zip -d $RMdatabase -g $__root_dir__ -r $output_html; - - </command> + python3 ${__tool_directory__}/RM_custom_search.py -i $input_zip -d $RMdatabase -g + $__root_dir__ -r $output_html; - <inputs> - <param format="zip" type="data" name="input_zip" label="RepeatExplorer output data archive" help="Zip archive obtained from previouse Graph-based sequence clustering"/> - <param name="RMdatabase" format="fasta" type="data" label="Library of repeats" help="Library of repeats as DNA sequences in fasta format. The recommended format for IDs in a custom library is : '>reapeatname#class/subclass'"/> - </inputs> - - <outputs> - <data format="html" name="output_html" label="HTML summary of custom database ${RMdatabase.hid} search on dataset ${input_zip.hid} " /> - </outputs> + </command> + + <inputs> + <param format="zip" type="data" name="input_zip" + label="RepeatExplorer output data archive" + help="Zip archive obtained from previouse Graph-based sequence clustering"/> + <param name="RMdatabase" format="fasta" type="data" label="Library of repeats" + help="Library of repeats as DNA sequences in fasta format. The recommended format for IDs in a custom library is : '>reapeatname#class/subclass'"/> + </inputs> - <help> - **What it does** + <outputs> + <data format="html" name="output_html" + label="HTML summary of custom database ${RMdatabase.hid} search on dataset ${input_zip.hid} "/> + </outputs> - Use this tool if you want to scan previous clustering result with custom database of repeats using repeatmasker. - - </help> + <help> + **What it does** + + Use this tool if you want to scan previous clustering result with custom database + of repeats using repeatmasker. + + </help> </tool> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a cluster_table2krona_format.xml --- a/cluster_table2krona_format.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/cluster_table2krona_format.xml Wed Aug 02 11:31:12 2023 +0000 |
[ |
@@ -1,6 +1,9 @@ -<tool id="cluster_table2krona_format" name="Convert RepeatExplorer2 CLUSTER_table.csv to Krona formatted input " version="1.0.0" python_template_version="3.5"> +<tool id="cluster_table2krona_format" name="Convert RepeatExplorer2 CLUSTER_table.csv to Krona formatted input " version="1.0.0.3" python_template_version="3.5"> + <required_files> + <include type="literal" path="cluster_table2krona_format.py"/> + </required_files> <command detect_errors="exit_code"><![CDATA[ - $__tool_directory__/cluster_table2krona_format.py --input ${input} --output ${output} + python '$__tool_directory__'/cluster_table2krona_format.py --input ${input} --output ${output} #if $column == "Final_annotation" -m #end if |
b |
diff -r f4ed6a65a2ff -r 58807b35777a fasta_affixer.xml --- a/fasta_affixer.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/fasta_affixer.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,81 +1,93 @@ -<tool id="fasta_affixer" name="FASTA read name affixer" version="1.0.0"> -<description> Appending suffix and prefix to the read names </description> -<command interpreter="python3"> -fasta_affixer.py -f $input -p "$prefix" -s "$suffix" -n $nspace -o $output -</command> +<tool id="fasta_affixer" name="FASTA read name affixer" version="1.0.0.3"> + <description>Appending suffix and prefix to the read names</description> + <required_files> + <include type="literal" path="fasta_affixer.py"/> + </required_files> + + <command> + python '$__tool_directory__'/fasta_affixer.py -f $input -p "$prefix" -s "$suffix" + -n $nspace -o $output + </command> + + <inputs> + <param format="fasta" type="data" name="input" label="Choose your FASTA file"/> + <param name="prefix" type="text" size="10" value="" label="Prefix" + help="Enter prefix which will be added to all read names"/> + <param name="suffix" type="text" size="10" value="" label="Suffix" + help="Enter suffix which will be added to all read names"/> + <param name="nspace" type="integer" size="10" value="0" min="0" max="1000" + label="Number of spaces in the name to ignore" + help="By default, a string before the first space is considered to be the read name, and all characters following the space are discarded. If you want to keep the information following the space(s) in the name, enter positive integer."/> + </inputs> + + <outputs> + <data format="fasta" name="output" + label="FASTA dataset ${input.hid} with modified sequence names"/> + </outputs> - <inputs> - <param format="fasta" type="data" name="input" label="Choose your FASTA file" /> - <param name="prefix" type="text" size="10" value="" label="Prefix" help="Enter prefix which will be added to all read names" /> - <param name="suffix" type="text" size="10" value="" label="Suffix" help="Enter suffix which will be added to all read names"/> - <param name="nspace" type="integer" size="10" value="0" min="0" max="1000" label="Number of spaces in the name to ignore" help="By default, a string before the first space is considered to be the read name, and all characters following the space are discarded. If you want to keep the information following the space(s) in the name, enter positive integer."/> - </inputs> + <tests> + <test> + <param name="input" value="single_output.fasta"/> + <param name="prefix" value="TEST"/> + <param name="suffux" value="OK"/> + <param name="nspace" value="0"/> + <output name="output" value="prefix_suffix.fasta"/> + </test> + </tests> + <help> + **What is does** + + Tool for appending prefix and suffix to sequences names in fasta formated + sequences. This tool is useful + if you want to do comparative analysis with RepeatExplorer and need to + append sample codes to sequence identifiers + + **Example** + The following fasta file: + + :: + + >123454 + acgtactgactagccatgacg + >234235 + acgtactgactagccatgacg + + is renamed to: + + :: + + >prefix123454suffix + acgtactgactagccatgacg + >prefix234235suffix + acgtactgactagccatgacg - <outputs> - <data format="fasta" name="output" label="FASTA dataset ${input.hid} with modified sequence names" /> - </outputs> + By default, anything after spaces is + excluded from sequences name. In example sequence: + + :: - <tests> - <test> - <param name="input" value="single_output.fasta" /> - <param name="prefix" value="TEST" /> - <param name="suffux" value="OK"/> - <param name="nspace" value="0" /> - <output name="output" value="prefix_suffix.fasta" /> - </test> - </tests> - <help> -**What is does** - -Tool for appending prefix and suffix to sequences names in fasta formated sequences. This tool is useful -if you want to do comparative analysis with RepeatExplorer and need to -append sample codes to sequence identifiers + >SRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1 + CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC + IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG -**Example** -The following fasta file: - -:: + when **Number of spaces in name to ignore** is set to 0 (default) the output will + be: - >123454 - acgtactgactagccatgacg - >234235 - acgtactgactagccatgacg - -is renamed to: + :: -:: - - >prefix123454suffix - acgtactgactagccatgacg - >prefix234235suffix - acgtactgactagccatgacg + >prefixSRR352150.23846180suffix + CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC -By default, anything after spaces is -excluded from sequences name. In example sequence: - -:: - - >SRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1 - CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC - IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG + If you want to keep spaces the setting **Number of spaces in name to ignore** to 1 + will yield -when **Number of spaces in name to ignore** is set to 0 (default) the output will be: - -:: - - >prefixSRR352150.23846180suffix - CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC + :: - -If you want to keep spaces the setting **Number of spaces in name to ignore** to 1 will yield - -:: - - >prefixSRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1suffix - CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC + >prefixSRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1suffix + CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC -</help> + </help> </tool> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a fasta_interlacer.xml --- a/fasta_interlacer.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/fasta_interlacer.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,33 +1,39 @@ +<tool id="fasta_interlacer" name="FASTA interlacer" version="1.0.0.3"> + <description>Join pared reads into single file</description> + <required_files> + <include type="literal" path="fasta_interlacer.py"/> + </required_files> + <command> + python '$__tool_directory__'/fasta_interlacer.py -a $A -b $B -p $paired -x $single + </command> -<tool id="fasta_interlacer" name="FASTA interlacer" version="1.0.0"> -<description> Join pared reads into single file </description> -<command interpreter="python"> -fasta_interlacer.py -a $A -b $B -p $paired -x $single -</command> - - <inputs> - <param format="fasta" type="data" name="A" label="Left-hand mates" /> - <param format="fasta" type="data" name="B" label="Right-hand mates" /> - </inputs> + <inputs> + <param format="fasta" type="data" name="A" label="Left-hand mates"/> + <param format="fasta" type="data" name="B" label="Right-hand mates"/> + </inputs> - <outputs> - <data format="fasta" name="paired" label="Interlaced paired reads from datasets ${A.hid} and ${B.hid} "/> - <data format="fasta" name="single" label="Reads without corresponding mate from datasets ${A.hid} and ${B.hid}"/> - </outputs> + <outputs> + <data format="fasta" name="paired" + label="Interlaced paired reads from datasets ${A.hid} and ${B.hid} "/> + <data format="fasta" name="single" + label="Reads without corresponding mate from datasets ${A.hid} and ${B.hid}"/> + </outputs> - <help> -**What it does** - This tools joins paired end FASTA reads from separate files, one with the left mates and one with the right mates, into a single files. - Last character in identifiers is used to distinguish pairs. - -**Note !!!** - This tools is to be used as more efficient replacement of FASTQ interlacer. Galaxy built-in FASTQ interlacer allows different ordering - of sequences in both files but this flexibility comes with high memory requirements when large files are used. FASTA interlacer is simple but order of magnitude - faster tools which can be used on files where reads are in the same order. - - -</help> + <help> + **What it does** + This tools joins paired end FASTA reads from separate files, one with the left + mates and one with the right mates, into a single files. + Last character in identifiers is used to distinguish pairs. - + **Note !!!** + This tools is to be used as more efficient replacement of FASTQ interlacer. Galaxy + built-in FASTQ interlacer allows different ordering + of sequences in both files but this flexibility comes with high memory + requirements when large files are used. FASTA interlacer is simple but order of + magnitude + faster tools which can be used on files where reads are in the same order. + + + </help> </tool> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a fastq_name_affixer.xml --- a/fastq_name_affixer.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/fastq_name_affixer.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,95 +1,110 @@ <tool id="names_affixer" name="FASTQ Read name affixer" version="1.0.0"> -<description> Tool appending suffix and prefix to sequences names </description> -<command interpreter="python"> -${__tool_directory__}/name_affixer.py -f $input -p "$prefix" -s "$suffix" -n $nspace > $output -</command> + <description>Tool appending suffix and prefix to sequences names</description> + <required_files> + <include type="literal" path="name_affixer.py"/> + </required_files> + <command> + ${__tool_directory__}/name_affixer.py -f $input -p "$prefix" -s "$suffix" -n + $nspace > $output + </command> - <inputs> - <param format="fastq" type="data" name="input" label="Choose your FASTQ file" /> - <param name="prefix" type="text" size="10" value="" label="Prefix" help="Enter prefix which will be added to all sequences names" /> - <param name="suffix" type="text" size="10" value="" label="Suffix" help="Enter suffix which will be added to all sequences names"/> - <param name="nspace" type="integer" size="10" value="0" min="0" max="1000" label="Number of spaces in sequence name to ignore" help="Sequence name is a string before the first space. If you want name to include spaces in name, enter positive integer. All other characters beyond ignored spaces are omitted"/> - </inputs> + <inputs> + <param format="fastq" type="data" name="input" label="Choose your FASTQ file"/> + <param name="prefix" type="text" size="10" value="" label="Prefix" + help="Enter prefix which will be added to all sequences names"/> + <param name="suffix" type="text" size="10" value="" label="Suffix" + help="Enter suffix which will be added to all sequences names"/> + <param name="nspace" type="integer" size="10" value="0" min="0" max="1000" + label="Number of spaces in sequence name to ignore" + help="Sequence name is a string before the first space. If you want name to include spaces in name, enter positive integer. All other characters beyond ignored spaces are omitted"/> + </inputs> - <outputs> - <data format="fastq" name="output" label="FASTQ dataset ${input.hid} with modified sequence names" /> - </outputs> + <outputs> + <data format="fastq" name="output" + label="FASTQ dataset ${input.hid} with modified sequence names"/> + </outputs> + + <help> + **What is does** - <help> -**What is does** - -Tool for appending prefix and suffix to sequences names in fastq formated sequences. + Tool for appending prefix and suffix to sequences names in fastq formated + sequences. -**Example** + **Example** + + The following Solexa-FASTQ file: + + :: -The following Solexa-FASTQ file: - -:: - - @CSHL_4_FC042GAMMII_2_1_517_596 - GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT - +CSHL_4_FC042GAMMII_2_1_517_596 - 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 - -is renamed to: + @CSHL_4_FC042GAMMII_2_1_517_596 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +CSHL_4_FC042GAMMII_2_1_517_596 + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 + 24 9 24 9 40 10 10 15 40 + + is renamed to: + + :: -:: - - @prefixCSHL_4_FC042GAMMII_2_1_517_596suffix - GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT - +prefixCSHL_4_FC042GAMMII_2_1_517_596suffix - 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + @prefixCSHL_4_FC042GAMMII_2_1_517_596suffix + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +prefixCSHL_4_FC042GAMMII_2_1_517_596suffix + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 + 24 9 24 9 40 10 10 15 40 + + different format: -different format: - + + :: -:: - - @HISEQ1:92:c0190acxx:8:1101:1252:2230 2:N:0:CGATGT - AGAGGAAAAAACATAGTTCTTGTCTAAAAAAATCCCTTGAAAAAGGGCAGATGTATAGAAATAGAAAATTTCAAAGAAAAACTCTCTACAAATGGAAGAGA - + - CCCFFFFFHHHHHJJJJIJJJJJJJJJJJJJJJIJJJJJIIJJJJJJGIJIJIHHHHHHHHFFFFFFDEEEEEDCDDDDDDDCCDDDEDDDDD>CCCCB@9 - -is renamed to: + @HISEQ1:92:c0190acxx:8:1101:1252:2230 2:N:0:CGATGT + AGAGGAAAAAACATAGTTCTTGTCTAAAAAAATCCCTTGAAAAAGGGCAGATGTATAGAAATAGAAAATTTCAAAGAAAAACTCTCTACAAATGGAAGAGA + + + CCCFFFFFHHHHHJJJJIJJJJJJJJJJJJJJJIJJJJJIIJJJJJJGIJIJIHHHHHHHHFFFFFFDEEEEEDCDDDDDDDCCDDDEDDDDD>CCCCB@9 -:: - - @prefixHISEQ1:92:c0190acxx:8:1101:1252:2230suffix - AGAGGAAAAAACATAGTTCTTGTCTAAAAAAATCCCTTGAAAAAGGGCAGATGTATAGAAATAGAAAATTTCAAAGAAAAACTCTCTACAAATGGAAGAGA - + - CCCFFFFFHHHHHJJJJIJJJJJJJJJJJJJJJIJJJJJIIJJJJJJGIJIJIHHHHHHHHFFFFFFDEEEEEDCDDDDDDDCCDDDEDDDDD>CCCCB@9 - -note that string after first space is omitted! + is renamed to: + + :: + + @prefixHISEQ1:92:c0190acxx:8:1101:1252:2230suffix + AGAGGAAAAAACATAGTTCTTGTCTAAAAAAATCCCTTGAAAAAGGGCAGATGTATAGAAATAGAAAATTTCAAAGAAAAACTCTCTACAAATGGAAGAGA + + + CCCFFFFFHHHHHJJJJIJJJJJJJJJJJJJJJIJJJJJIIJJJJJJGIJIJIHHHHHHHHFFFFFFDEEEEEDCDDDDDDDCCDDDEDDDDD>CCCCB@9 + + note that string after first space is omitted! -Because sequence names sometimes containg spaces which delimit the actual name. By default, anything after spaces is -excluded from sequences name. In example sequence: - -:: - - @SRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1 - CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC - + - IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG + Because sequence names sometimes containg spaces which delimit the actual name. By + default, anything after spaces is + excluded from sequences name. In example sequence: + + :: + + @SRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1 + CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC + + + IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG -when **Number of spaces in name to ignore** is set to 0 (default) the output will be: - -:: - - @prefixSRR352150.23846180suffix - CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC - + - IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG - -If you want to keep spaces the setting **Number of spaces in name to ignore** to 1 will yield - -:: - - @prefixSRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1suffix - CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC - + - IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG - - -</help> + when **Number of spaces in name to ignore** is set to 0 (default) the output will + be: + + :: + + @prefixSRR352150.23846180suffix + CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC + + + IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG + + If you want to keep spaces the setting **Number of spaces in name to ignore** to 1 + will yield + + :: + + @prefixSRR352150.23846180 HWUSI-EAS1786:7:119:15910:19280/1suffix + CTGGATTCTATACCTTTGGCAACTACTTCTTGGTTGATCAGGAAATTAACACTAGTAGTTTAGGCAATTTGGAATGGTGCCAAAGATGTATAGAACTTTC + + + IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIHIIIIIFIIIIIIHDHBBIHFIHIIBHHDDHIFHIHIIIHIHGGDFDEI@EGEGFGFEFB@ECG + + + </help> </tool> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a pairScan.xml --- a/pairScan.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/pairScan.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,5 +1,5 @@ -<tool id="pairScan" name="Scan paired-end reads for overlap" version="1.0.0.1"> +<tool id="pairScan" name="Scan paired-end reads for overlap" version="1.0.0.3"> <description> Scan paired-end reads for overlap </description> <requirements> <requirement type="package">python-levenshtein</requirement> @@ -8,8 +8,8 @@ <include type="literal" path="pairScan.py"/> </required_files> - <command interpreter="python"> - pairScan.py -f $fasta_input -o $min_overlap -m $max_mismatch -p $pass -b $bad -s $offset + <command> + python '$__tool_directory__'/pairScan.py -f $fasta_input -o $min_overlap -m $max_mismatch -p $pass -b $bad -s $offset </command> <inputs> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a paired_fastq_filtering.xml --- a/paired_fastq_filtering.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/paired_fastq_filtering.xml Wed Aug 02 11:31:12 2023 +0000 |
[ |
b'@@ -1,184 +1,212 @@\n <tool id="paired_fastq_filtering" name="Preprocessing of FASTQ paired-end reads">\n- <stdio>\n- <exit_code range="1:" level="fatal" description="Error" />\n- </stdio>\n- <description>\n- Preprocessing of paired-end reads in FASTQ format\n- including trimming, quality filtering, cutadapt filtering and interlacing. Broken\n- pairs are discarded.\n- </description>\n- <requirements>\n- <requirement type="package">blast</requirement>\n- <requirement type="package">cutadapt</requirement>\n- <requirement type="package">bioconductor-shortread</requirement>\n- <requirement type="package">r-optparse</requirement>\n- </requirements>\n- <command interpreter="bash">\n- paired_fastq_filtering_wrapper.sh -a ${A} -b ${B} -o ${paired} -c ${cut_off} -p ${percent_above} -N ${max_n} $rename -G ${png_output}\n+ <stdio>\n+ <exit_code range="1:" level="fatal" description="Error" version="1.0.0.3"/>\n+ </stdio>\n+ <description>\n+ Preprocessing of paired-end reads in FASTQ format\n+ including trimming, quality filtering, cutadapt filtering and interlacing. Broken\n+ pairs are discarded.\n+ </description>\n+ <requirements>\n+ <requirement type="package">blast</requirement>\n+ <requirement type="package">cutadapt</requirement>\n+ <requirement type="package">bioconductor-shortread</requirement>\n+ <requirement type="package">r-optparse</requirement>\n+ </requirements>\n+ <required_files>\n+ <include type="literal" path="paired_fastq_filtering_wrapper.sh"/>\n+ <include type="literal" path="paired_fastq_filtering.R"/>\n+ <include type="literal" path="fasta_interlacer.py"/>\n+ </required_files>\n+ <command>\n+ bash \'$__tool_directory__\'/paired_fastq_filtering_wrapper.sh -a ${A} -b ${B} -o\n+ ${paired} -c ${cut_off} -p ${percent_above} -N ${max_n} $rename -G ${png_output}\n \n- #if $sampling.sequence_sampling :\n- -n $sampling.sample_size\n- #end if\n+ #if $sampling.sequence_sampling :\n+ -n $sampling.sample_size\n+ #end if\n \n- #if $trimming.sequence_trimming :\n- -e $trimming.trim_end -s $trimming.trim_start\n- #end if\n+ #if $trimming.sequence_trimming :\n+ -e $trimming.trim_end -s $trimming.trim_start\n+ #end if\n \n- #if $cutadapt.use_custom :\n- -C "${cutadapt.custom_options}"\n- #end if\n+ #if $cutadapt.use_custom :\n+ -C "${cutadapt.custom_options}"\n+ #end if\n \n- #if $similarity_filtering.include :\n- -F "${similarity_filtering.filter_database}"\n- #end if\n+ #if $similarity_filtering.include :\n+ -F "${similarity_filtering.filter_database}"\n+ #end if\n \n- </command>\n+ </command>\n \n- <inputs>\n- <param format="fastq,fastq.gz" type="data" name="A" label="Left-hand reads" />\n+ <inputs>\n+ <param format="fastq,fastq.gz" type="data" name="A" label="Left-hand reads"/>\n \n- <param format="fastq,fastq.gz" type="data" name="B" label="Right-hand reads" />\n+ <param format="fastq,fastq.gz" type="data" name="B" label="Right-hand reads"/>\n \n- <conditional name="sampling">\n- <param name="sequence_sampling" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Read sampling"/>\n-\t <when value="false">\n- <!-- do nothing here -->\n- </when>\n- <when value="true">\n- \t\t <param name="sample_size" type="integer" label="Sample size (number of pairs)" help="How many read pairs should be sampled" value="500000" min="0"/>\n- </when>\n- </conditional>\n+ <conditional name="sampling">\n+ <param name="sequence_sampling" type="boolean" truevalue="true"\n+ falsevalue="false" checked="False" label="Read sampling"/>\n+ <when value="false">\n+ <!-- do nothing here -->\n+ </when>\n+ <when value="true">\n+ <param name="sample_size" type="integer"\n+ label="Sample siz'..b'. Trimming (optional)\n+ #. Filter by quality\n+ #. Discard single reads, keep complete pairs\n+ #. Cutadapt filtering\n+ #. Discard single reads, keep complete pairs\n+ #. Sampling (optional)\n+ #. Interlacing two fasta files\n \n-**Quality setting cutoff**\n+ **Quality setting cutoff**\n \n-To correctly set quality cutoff, you need to know how the quality is encoded in your fastq file, default\n-filtering which is suitable for Sanger and Illumina 1.8 encoding is shown below::\n+ To correctly set quality cutoff, you need to know how the quality is encoded in\n+ your fastq file, default\n+ filtering which is suitable for Sanger and Illumina 1.8 encoding is shown below::\n \n \n- Default filtering cutoff\n- | \n- |\n- V\n- SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS.....................................................\n- ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................\n- ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII......................\n- .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ......................\n- LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL....................................................\n- !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n- | | | | | |\n- 33 59 64 73 104 126\n- 0........................26...31.......40 \n- -5....0........9.............................40 \n- 0........9.............................40 \n- 3.....9.............................40 \n- 0.2......................26...31........41 \n- \n- S - Sanger Phred+33, raw reads typically (0, 40)\n- X - Solexa Solexa+64, raw reads typically (-5, 40)\n- I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)\n- J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)\n- with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold) \n- (Note: See discussion above).\n- L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)\n- \n- </help> \n+ Default filtering cutoff\n+ |\n+ |\n+ V\n+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS.....................................................\n+ ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................\n+ ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII......................\n+ .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ......................\n+ LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL....................................................\n+ !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n+ | | | | | |\n+ 33 59 64 73 104 126\n+ 0........................26...31.......40\n+ -5....0........9.............................40\n+ 0........9.............................40\n+ 3.....9.............................40\n+ 0.2......................26...31........41\n+\n+ S - Sanger Phred+33, raw reads typically (0, 40)\n+ X - Solexa Solexa+64, raw reads typically (-5, 40)\n+ I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)\n+ J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)\n+ with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold)\n+ (Note: See discussion above).\n+ L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)\n+\n+ </help>\n </tool>\n \n' |
b |
diff -r f4ed6a65a2ff -r 58807b35777a plot_comparative_clustering_summary.xml --- a/plot_comparative_clustering_summary.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/plot_comparative_clustering_summary.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,70 +1,86 @@ -<tool id="plot_comparative" name="Visualization of comparative clustering" version="1.0.0"> - <description> Simple utility to create visualization of RepeatExplorer conmparative analysis</description> - <requirements> - <requirement type="package">r-optparse</requirement> - </requirements> - - <command interpreter="Rscript" detect_errors="exit_code" > - $__tool_directory__/plot_comparative_clustering_summary.R - --cluster_table=$cluster_table - --comparative_counts=$counts - --number_of_colors=$number_of_colors - --output=$outpdf - $nuclear_only - - #if $normalization.use_genome_size: - --genome_size $normalization.genome_size_table - #end if +<tool id="plot_comparative" name="Visualization of comparative clustering" + version="1.0.0.3"> + <description>Simple utility to create visualization of RepeatExplorer conmparative + analysis + </description> + <requirements> + <requirement type="package">r-optparse</requirement> + </requirements> + <required_files> + <include type="literal" path="plot_comparative_clustering_summary.R"/> + </required_files> + <command detect_errors="exit_code"> + Rscript '$__tool_directory__'/plot_comparative_clustering_summary.R + --cluster_table=$cluster_table + --comparative_counts=$counts + --number_of_colors=$number_of_colors + --output=$outpdf + $nuclear_only + #if $normalization.use_genome_size: + --genome_size $normalization.genome_size_table + #end if </command> <inputs> - <param format="txt" type="data" name="cluster_table" label="file from RepeatExplorer2 clustering - CLUSTER_TABLE.csv"/> - <param format="txt" type="data" name="counts" label="file from RepeatExplorer2 output - COMPARATIVE_ANALYSIS_COUNTS.csv"/> - <param value="10" min="2" max="20" type="integer" name="number_of_colors" label="Maximum number of color used for plottting"/> - <param value="false" type="boolean" truevalue="--nuclear_only" falsevalue="" name="nuclear_only" label="Remove all non-nuclear sequences (organel and contamination)"/> - <conditional name="normalization"> - <param name="use_genome_size" type="boolean" checked="False" label="Normalize to genome size" help="Note that if this option is used, non-nuclear sequences are always removed."/> - <when value="false"> - <!-- pass --> - </when> - <when value="true"> - <param name="genome_size_table" type="data" format="txt" label="table with genome sizes"/> + <param format="txt" type="data" name="cluster_table" + label="file from RepeatExplorer2 clustering - CLUSTER_TABLE.csv"/> + <param format="txt" type="data" name="counts" + label="file from RepeatExplorer2 output - COMPARATIVE_ANALYSIS_COUNTS.csv"/> + <param value="10" min="2" max="20" type="integer" name="number_of_colors" + label="Maximum number of color used for plottting"/> + <param value="false" type="boolean" truevalue="--nuclear_only" falsevalue="" + name="nuclear_only" + label="Remove all non-nuclear sequences (organel and contamination)"/> + <conditional name="normalization"> + <param name="use_genome_size" type="boolean" checked="False" + label="Normalize to genome size" + help="Note that if this option is used, non-nuclear sequences are always removed."/> + <when value="false"> + <!-- pass --> + </when> + <when value="true"> + <param name="genome_size_table" type="data" format="txt" + label="table with genome sizes"/> - </when> + </when> - </conditional> + </conditional> </inputs> <outputs> - <data format="pdf" name="outpdf" label="Comparative analysis summary"/> + <data format="pdf" name="outpdf" label="Comparative analysis summary"/> </outputs> <help> - **Visualization of comparative clustering** - Visualization can be created two output files from RepeatExplorer pipeline. - - Input file CLUSTER_TABLE.csv contains automatic annotation, information about cluster sizes and the total number of reads used for analysis - Example of CLUSTER_TABLE.csv: :: + **Visualization of comparative clustering** + Visualization can be created two output files from RepeatExplorer pipeline. + + Input file CLUSTER_TABLE.csv contains automatic annotation, information about + cluster sizes and the total number of reads used for analysis + Example of CLUSTER_TABLE.csv: :: - "Number_of_reads_in_clusters" 3002 - "Number_of_clusters" 895 - "Number_of_superclusters" 895 - "Number_of_singlets" 6998 + "Number_of_reads_in_clusters" 3002 + "Number_of_clusters" 895 + "Number_of_superclusters" 895 + "Number_of_singlets" 6998 + + "Number_of_analyzed_reads" 10000 - "Number_of_analyzed_reads" 10000 - - "Cluster" "Supercluster" "Size" "Size_adjusted" "Automatic_annotation" "TAREAN_classification" "Final_annotation" - 1 1 61 61 "All" "Other" - 2 2 59 59 "All/repeat/satellite" "Putative satellites (high confidence)" - 3 3 45 45 "All/repeat/satellite" "Putative satellites (low confidence)" - 4 4 38 38 "All" "Other" - 5 5 32 32 "All" "Other" - 6 6 28 28 "All" "Other" - 7 7 25 25 "All" "Other" - 8 8 24 24 "All" "Other" - 9 9 23 23 "All" "Other" - 10 10 22 22 "All/repeat/mobile_element/Class_I/LTR/Ty3_gypsy/non-chromovirus/OTA/Tat/Ogre" "Other" - 11 11 20 20 "All" "Other" - 12 12 20 20 "All" "Other" + "Cluster" "Supercluster" "Size" "Size_adjusted" "Automatic_annotation" + "TAREAN_classification" "Final_annotation" + 1 1 61 61 "All" "Other" + 2 2 59 59 "All/repeat/satellite" "Putative satellites (high confidence)" + 3 3 45 45 "All/repeat/satellite" "Putative satellites (low confidence)" + 4 4 38 38 "All" "Other" + 5 5 32 32 "All" "Other" + 6 6 28 28 "All" "Other" + 7 7 25 25 "All" "Other" + 8 8 24 24 "All" "Other" + 9 9 23 23 "All" "Other" + 10 10 22 22 + "All/repeat/mobile_element/Class_I/LTR/Ty3_gypsy/non-chromovirus/OTA/Tat/Ogre" + "Other" + 11 11 20 20 "All" "Other" + 12 12 20 20 "All" "Other" </help> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a renameSequences.xml --- a/renameSequences.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/renameSequences.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,25 +1,38 @@ -<tool id="rename_sequences" name="Rename sequences" version="1.0.0"> -<description> Rename sequences using numerical counter, keep required prefix and pair information </description> -<command interpreter="python"> -renameSequences2.py $input $paired index.tmp $prefix_length > $output -</command> +<tool id="rename_sequences" name="Rename sequences" version="1.0.0.3"> + <description>Rename sequences using numerical counter, keep required prefix and pair + information + </description> + <required_files> + <include type="literal" path="renameSequences2.py"/> + </required_files> + <command> + python '$__tool_directory__'/renameSequences2.py $input $paired index.tmp + $prefix_length > $output + </command> - <inputs> - <param format="fasta" type="data" name="input" label="Choose your FASTA file" /> - <param name="prefix_length" type="integer" size="10" value="0" label="Prefix length" help="Enter the length of prefix to keep in sequences names" /> - <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Sequences are paired-end reads in interlaced format" help=""/> - </inputs> + <inputs> + <param format="fasta" type="data" name="input" label="Choose your FASTA file"/> + <param name="prefix_length" type="integer" size="10" value="0" + label="Prefix length" + help="Enter the length of prefix to keep in sequences names"/> + <param name="paired" type="boolean" truevalue="true" falsevalue="false" + checked="False" label="Sequences are paired-end reads in interlaced format" + help=""/> + </inputs> - <outputs> - <data format="fasta" name="output" label="Renamed sequences from dataset ${input.hid}" /> - </outputs> + <outputs> + <data format="fasta" name="output" + label="Renamed sequences from dataset ${input.hid}"/> + </outputs> + + <help> + **What is does** - <help> -**What is does** - -Use this tool to rename your sequences with numerical counter while keeping sequence name prefex as part of the name. -If paired-end reads are used, the last character in sequence name is used to distinguish pairs. - -</help> + Use this tool to rename your sequences with numerical counter while keeping + sequence name prefex as part of the name. + If paired-end reads are used, the last character in sequence name is used to + distinguish pairs. + + </help> </tool> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a sampleFasta.xml --- a/sampleFasta.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/sampleFasta.xml Wed Aug 02 11:31:12 2023 +0000 |
[ |
@@ -1,4 +1,4 @@ -<tool id="sampler" name="Read sampling" version="1.0.1"> +<tool id="sampler" name="Read sampling" version="1.0.1.3"> <description> Tool for randomly sampling subsets of reads from large datasets</description> <requirements> <requirement type="package">seqkit</requirement> @@ -6,10 +6,14 @@ <stdio> <exit_code range="1:" level="fatal" description="Error" /> </stdio> + <required_files> + <include type="literal" path="deinterlacer.py" /> + <include type="literal" path="fasta_interlacer.py" /> + </required_files> <command> <![CDATA[ #if str($paired)=="true" - ${__tool_directory__}/deinterlacer.py $input Afile Bfile + python ${__tool_directory__}/deinterlacer.py $input Afile Bfile && NUMBER=\$(($(number) / 2)) && @@ -17,7 +21,7 @@ && seqkit sample -2 --number \$NUMBER --rand-seed $seed -o Bsample -w 0 Bfile < /dev/null && - ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile + python ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile #else seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input < /dev/null #end if |
b |
diff -r f4ed6a65a2ff -r 58807b35777a single_fastq_filtering.xml --- a/single_fastq_filtering.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/single_fastq_filtering.xml Wed Aug 02 11:31:12 2023 +0000 |
[ |
b'@@ -1,173 +1,197 @@\n-<tool id="single_fastq_filtering" name="Preprocessing of FASTQ reads">\n- <stdio>\n- <exit_code range="1:" level="fatal" description="Error" />\n- </stdio>\n- <description>\n- Preprocessing of FASTQ read files\n- including trimming, quality filtering, cutadapt filtering and sampling\n- </description>\n- <requirements>\n- <requirement type="package">blast</requirement>\n- <requirement type="package">cutadapt</requirement>\n- <requirement type="package">bioconductor-shortread</requirement>\n- <requirement type="package">r-optparse</requirement>\n- </requirements>\n- <command interpreter="bash">\n- single_fastq_filtering_wrapper.sh -a ${A} -o ${output} -c ${cut_off} -p ${percent_above} -N ${max_n} -G ${png_output}\n+<tool id="single_fastq_filtering" name="Preprocessing of FASTQ reads" version="1.0.0.3">\n+ <stdio>\n+ <exit_code range="1:" level="fatal" description="Error"/>\n+ </stdio>\n+ <description>\n+ Preprocessing of FASTQ read files\n+ including trimming, quality filtering, cutadapt filtering and sampling\n+ </description>\n+ <requirements>\n+ <requirement type="package">blast</requirement>\n+ <requirement type="package">cutadapt</requirement>\n+ <requirement type="package">bioconductor-shortread</requirement>\n+ <requirement type="package">r-optparse</requirement>\n+ </requirements>\n+ <required_files>\n+ <include type="literal" path="single_fastq_filtering_wrapper.sh"/>\n+ <include type="literal" path="single_fastq_filtering.R"/>\n+ </required_files>\n+ <command interpreter="bash">\n+ bash \'$__tool_directory__\'/single_fastq_filtering_wrapper.sh -a ${A} -o ${output}\n+ -c ${cut_off} -p ${percent_above} -N ${max_n} -G ${png_output}\n \n- #if $sampling.sequence_sampling :\n- -n $sampling.sample_size\n- #end if\n+ #if $sampling.sequence_sampling :\n+ -n $sampling.sample_size\n+ #end if\n \n- #if $trimming.sequence_trimming :\n- -e $trimming.trim_end -s $trimming.trim_start\n- #end if\n+ #if $trimming.sequence_trimming :\n+ -e $trimming.trim_end -s $trimming.trim_start\n+ #end if\n \n- #if $cutadapt.use_custom :\n- -C "${cutadapt.custom_options}"\n- #end if\n+ #if $cutadapt.use_custom :\n+ -C "${cutadapt.custom_options}"\n+ #end if\n \n- #if $similarity_filtering.include :\n- -F "${similarity_filtering.filter_database}"\n- #end if\n+ #if $similarity_filtering.include :\n+ -F "${similarity_filtering.filter_database}"\n+ #end if\n \n \n- </command>\n+ </command>\n \n- <inputs>\n- <param format="fastq,fastq.gz" type="data" name="A" label="Reads in FASTQ format" />\n- <conditional name="sampling">\n- <param name="sequence_sampling" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Read sampling"/>\n-\t <when value="false">\n- <!-- do nothing here -->\n- </when>\n- <when value="true">\n- \t\t <param name="sample_size" type="integer" label="Sample size (number of reads)" help="How many reads should be sampled" value="500000" min="0"/>\n- </when>\n- </conditional>\n+ <inputs>\n+ <param format="fastq,fastq.gz" type="data" name="A"\n+ label="Reads in FASTQ format"/>\n+ <conditional name="sampling">\n+ <param name="sequence_sampling" type="boolean" truevalue="true"\n+ falsevalue="false" checked="False" label="Read sampling"/>\n+ <when value="false">\n+ <!-- do nothing here -->\n+ </when>\n+ <when value="true">\n+ <param name="sample_size" type="integer"\n+ label="Sample size (number of reads)"\n+ help="How many reads should be sampled" value="500000" min="0"/>\n+ </when>\n+ </conditional>\n \n- <param type="integer" name="cut_off" label="Quality cutoff" value="10" min="0" help="See below how to correctly set '..b'ty \n-#. Cutadapt filtering \n-#. Sampling (optional) \n-#. Interlacing two fasta files\n+ 1. Trimming (optional)\n+ #. Filter by quality\n+ #. Cutadapt filtering\n+ #. Sampling (optional)\n+ #. Interlacing two fasta files\n \n-**Quality setting cutoff**\n+ **Quality setting cutoff**\n \n-To correctly set quality cutoff, you need to know how the quality is encoded in your fastq file, default\n-filtering which is suitable for Sanger and Illumina 1.8 encoding is shown below::\n+ To correctly set quality cutoff, you need to know how the quality is encoded in\n+ your fastq file, default\n+ filtering which is suitable for Sanger and Illumina 1.8 encoding is shown below::\n \n \n- Default filtering cutoff\n- | \n- |\n- V\n- SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS.....................................................\n- ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................\n- ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII......................\n- .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ......................\n- LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL....................................................\n- !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n- | | | | | |\n- 33 59 64 73 104 126\n- 0........................26...31.......40 \n- -5....0........9.............................40 \n- 0........9.............................40 \n- 3.....9.............................40 \n- 0.2......................26...31........41 \n- \n- S - Sanger Phred+33, raw reads typically (0, 40)\n- X - Solexa Solexa+64, raw reads typically (-5, 40)\n- I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)\n- J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)\n- with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold) \n- (Note: See discussion above).\n- L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)\n- \n- </help> \n+ Default filtering cutoff\n+ |\n+ |\n+ V\n+ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS.....................................................\n+ ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................\n+ ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII......................\n+ .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ......................\n+ LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL....................................................\n+ !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n+ | | | | | |\n+ 33 59 64 73 104 126\n+ 0........................26...31.......40\n+ -5....0........9.............................40\n+ 0........9.............................40\n+ 3.....9.............................40\n+ 0.2......................26...31........41\n+\n+ S - Sanger Phred+33, raw reads typically (0, 40)\n+ X - Solexa Solexa+64, raw reads typically (-5, 40)\n+ I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)\n+ J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)\n+ with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold)\n+ (Note: See discussion above).\n+ L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)\n+\n+ </help>\n </tool>\n \n' |
b |
diff -r f4ed6a65a2ff -r 58807b35777a summarize_cluster_table.xml --- a/summarize_cluster_table.xml Thu Jul 27 09:46:13 2023 +0000 +++ b/summarize_cluster_table.xml Wed Aug 02 11:31:12 2023 +0000 |
b |
@@ -1,42 +1,58 @@ -<tool id="summarize_annotation" name="Repeat proportions from CLUSTER_TABLE" version="1.0.0"> - <description> Simple utility to summarize final annotations from RepeatExplorer CLUSTER_TABLE</description> - <requirements> - <requirement type="package">r-optparse</requirement> - </requirements> - - <command interpreter="Rscript" detect_errors="exit_code" > - $__tool_directory__/summarize_cluster_table.R - --cluster_table=$cluster_table - --output=$output +<tool id="summarize_annotation" name="Repeat proportions from CLUSTER_TABLE" + version="1.0.0.3"> + <description>Simple utility to summarize final annotations from RepeatExplorer + CLUSTER_TABLE + </description> + <requirements> + <requirement type="package">r-optparse</requirement> + </requirements> + <required_files> + <include type="literal" path="summarize_cluster_table.R"/> + </required_files> + <command interpreter="Rscript" detect_errors="exit_code"> + Rscript $__tool_directory__/summarize_cluster_table.R + --cluster_table=$cluster_table + --output=$output </command> <inputs> - <param format="txt" type="data" name="cluster_table" label="file from RepeatExplorer2 clustering - CLUSTER_TABLE.csv" help="CLUSTER_TABLE.csv must contains completed Final_annotation column" /> + <param format="txt" type="data" name="cluster_table" + label="file from RepeatExplorer2 clustering - CLUSTER_TABLE.csv" + help="CLUSTER_TABLE.csv must contains completed Final_annotation column"/> </inputs> <outputs> - <data format="tabular" name="output" label="Summary of repeat proportions from ${cluster_table.hid}"/> + <data format="tabular" name="output" + label="Summary of repeat proportions from ${cluster_table.hid}"/> </outputs> <help> - **The tool calculates genome proportions of identified repeats based on cluster annotations in CLUSTER_TABLE.csv** - - The column "Final_annotation" must be filled in the input file CLUSTER_TABLE.csv. Contamination and organelle clusters are discarded from quantification. Table header with information about numbers of analyzed reads should remain unchanged - see example below. + **The tool calculates genome proportions of identified repeats based on cluster + annotations in CLUSTER_TABLE.csv** - Example of CLUSTER_TABLE.csv: :: + The column "Final_annotation" must be filled in the input file CLUSTER_TABLE.csv. + Contamination and organelle clusters are discarded from quantification. Table + header with information about numbers of analyzed reads should remain unchanged - + see example below. + + Example of CLUSTER_TABLE.csv: :: - Number_of_reads_in_clusters 1185180 - Number_of_clusters 62148 - Number_of_superclusters 62031 - Number_of_singlets 314820 - Number_of_analyzed_reads 1500000 - Cluster Supercluster Size Size_adjusted Automatic_annotation TAREAN_annotation Final_annotation - 10 4 11967 11967 All/repeat/mobile_element/Class_I/LTR/Ty1_copia/SIRE Other All/repeat/mobile_element/Class_I/LTR/Ty1_copia/SIRE - 137 5 2094 2094 All/repeat Other All/repeat - 112 9 3117 3117 All/repeat/rDNA/45S_rDNA Other All/repeat/rDNA/45S_rDNA - 16 11 10078 10078 All/repeat/satellite Putative satellites (high confidence) All/repeat/satellite - 125 22 2630 2630 All/organelle/plastid Other All/organelle/plastid - 124 40 2645 2645 All/repeat/mobile_element/Class_I/LTR/Ty1_copia/Ivana Putative LTR elements All/repeat/mobile_element/Class_I/LTR/Ty1_copia/Ivana + Number_of_reads_in_clusters 1185180 + Number_of_clusters 62148 + Number_of_superclusters 62031 + Number_of_singlets 314820 + Number_of_analyzed_reads 1500000 + Cluster Supercluster Size Size_adjusted Automatic_annotation TAREAN_annotation + Final_annotation + 10 4 11967 11967 All/repeat/mobile_element/Class_I/LTR/Ty1_copia/SIRE Other + All/repeat/mobile_element/Class_I/LTR/Ty1_copia/SIRE + 137 5 2094 2094 All/repeat Other All/repeat + 112 9 3117 3117 All/repeat/rDNA/45S_rDNA Other All/repeat/rDNA/45S_rDNA + 16 11 10078 10078 All/repeat/satellite Putative satellites (high confidence) + All/repeat/satellite + 125 22 2630 2630 All/organelle/plastid Other All/organelle/plastid + 124 40 2645 2645 All/repeat/mobile_element/Class_I/LTR/Ty1_copia/Ivana Putative + LTR elements All/repeat/mobile_element/Class_I/LTR/Ty1_copia/Ivana </help> </tool> |
b |
diff -r f4ed6a65a2ff -r 58807b35777a tool_data/organele_ref_and_phi-X174.fasta --- a/tool_data/organele_ref_and_phi-X174.fasta Thu Jul 27 09:46:13 2023 +0000 +++ b/tool_data/organele_ref_and_phi-X174.fasta Wed Aug 02 11:31:12 2023 +0000 |
b |
b'@@ -15349,1393883 +15349,561421 @@\n GCTTGCCCCACCCATGAGTAAATATTTCATAGTAGCCTCATTAGACCGTAGATCTCTCTTGGTATATCCAGACAATAGGT\n AGGAACATAAACTGAAACATTCTGGAGCTACAAAGATAGTTATTAAATCGTTAGCACCACATAAAAACATTCCCCCTAGA\n GTAGCTGTTAATACGAATAACAGAAACTCTGTTATAGCCATTTCTGTACATTCAATGTACTCTACGGATAGAGGAATACA\n-TAAAGTTGAACATAATAAAATGAGAAATTGAAAGATTTCGTTGAAATTGTTCGTTTGGAAATTTCCCGAAAAGCTAATTA\n-TAGGTTCTTCTCTCCATCGGAACAATAGGGCCGTTATGCTTATTACTAAACTTGTTGAAGAGATGAAATAGAACCAAGGT\n-CTATCTTTTTGATCAGAGGTTGAATCGATCATCAGAAGAAGAATTAGGCCAAAAATTAGGATACATTCTGGGAAAATGAA\n-ACTTCCATTGAAGAGAAGCAAATGAAACGCTTTCATAAAAATTCTCGTAGAATCGAGAATGAAGTTTTCATTCTGTACAT\n-GCCAGATCATGAATTAGTAACTGCATCCAATCTCCGAAAAGTCCCGATTGTTTCGATTTTTGAAATGGGATATTTACGGA\n-ATCCCCATGAATAGGATCAAACCTTATTCCATGCTATTTCCATAAGATTCTTCTTTCTTATTCTTAAGCAAGCCCTCGAG\n-AGGGCTTAGTTGATCATGATTTCTGTTTTCTCTTTCTTTTCCTTTTTGTTTGTTTCGAGAAAGATATCGTCCGATTCTCC\n-TTCTATTGATTCTTTTCCGATCGAGATGTACGGATCCATGTGTCTACATACATAGATTCTGTTCATGGATTAACGAAAAT\n-GTGCAAGAGCTCTATTTGCCTCTGCCATTCTATGAGTCGCTTCCTTTTTGCGTATGGCACCCCCACTCCCTTTGGCAGCA\n-TCTACTAATTCGGAACTTAATTTGAAAGCCATATTTCGACCCGGACGCTTTTGGGATGCTTCTAATAACCAACGAATGGC\n-AAGTGCTCTTCCTTGTTTAGATCCTATTTCAATCGGAACTTTCCGCGTCGATCCTTTTTTATTACGTCTTGTTTTTACTC\n-CTATATTGGGAGTTACTCTACGTATTGCTTGACGTAAAACCAATAGTGGATTTGTTTCTGTCTTTTGTTGAATCTTTTTC\n-ACGGCTCGATAGAGAATTTGATAAGCCAATGATTTTTTTCCGTCTTTCAGAATACGGTTAACCACCATGTTAACTAATCG\n-ATTACGAAAAATTGGATCAGATTTTTCAGTTCTTTTTTCTGCAGTACCTCGACGTGACATGAGCGTGAAAGAGGTTCAAG\n-AATCCGTTTTCTTTTTCTAAGGGCTAAAATCACTTATTTTTTTTGCTTTTTGACCCCATATTGTAGGGTGGATCTCGAAA\n-GAGAGGAAAGATCTCCCTCCAAGCCGTACATACGACTTTCATCGAATACGGCTTTCCACAGAATTCTATAGGGATCTATG\n-AGATCGAGTATGGAATTCTGTTTACTCACTTTAAATTGAGTATCCGTTTCCCTCCTTTTCCCGCTAGGATCGGAAATCCT\n-GTATTTTCCATATCCATACGATCGAGTCCTTAGGTTTCCGAAATAGTGTAATGGAAAAAGAAGTGCTTCGAATCATTGCT\n-ATTTGACTCGGACCTGTTCTGAAAAAGTCGAGGTATTTCGAATTGTTTGTTGACACGGACAAAGTAAGGGAAAACCTCTG\n-AAAGAATTTCCATATTGACCTTGGACATATAAGAGTTCCGAATCGAATCTCTTTAGAAAGAAGATCTTTTGTCTCATGGT\n-AGTCTGCTCTAGTCCCTTTACGAAACTTTCGTTATTGGGTCCTTCTCCTGTTGCTTGATTCACATGGCATCATCAAATGA\n-TACAAGTCTTGGATAAGAATCTACAACGCACTAGAACGCCCTTGTTGATGATTCTTTACTGCGACTGCATCCAGCTCGAA\n-TAATGCGATATCTCACACCGAGTTCAAAGATACTTAACCCTTCCTCCTCTTACTAATACTACAGAATGTTCTTGTATGCC\n-CAATACCTGGTATAAGCAGTGATTTCAAATCCAGAAGTGAATCGTACTAATTTCCTTTACGTAAGGCAGAGTTGGGTTTT\n-TGGGGTTGATAGTGGAAAAGTCGACAGAGAAGGATCAGCATATCCTTGTTTAAATAGGTCCACTAGACCTTAGCTACTTA\n-ATTTCATGGTATTTAGCACGGACGATAGAAGATTGGAATGATGGTATCAATGTGCCGCATTGAAATGAGAGTGGCTATAT\n-CGCTATCAATCAAGGTGGGACAGATCAGCCATCTCCACAGATTGCTAGTTTGCTGAAAACAATCCAGCTTTTGCCAAAAG\n-AAAGCCAAACTCTCATCCAGATGCATCTCGATTCACATTTCTATATGATAATATGCACGACCGATTCTCGTATCTGCTGA\n-TACAAGAGATTCTTTCCAAAGATAACCCTAACCCCTGGGTGGGCGAAAGATGGAGACAATACTGCCGTTGAAGCGCAAGA\n-GTGAGCAGCAGTCCGGTCAGCCCTTGCTTTCTCAACTTGAGCAGGAAACTTCCAACCAAATAGGAGGGAGGGGCACTCTC\n-AAATAGGAATGCAAACATAAAATGCGTGCTCTTGAGTGTCAATTCCTCTTCTGACCGAAGTGAATAACAAACATCCCTTC\n-CATCTGGACACTGAATAATGAGAGAATCGTAGAAGTTATCGGAATCCGGGGCTAGAGTCTTTGGCTGAACATCCACAGAT\n-GATCGATTCCCTGCGCTAGCAAGTATTGGATTGGAGTGGGGGTTAGTTGGCTCACCATATGGTATAGATCGATAGATATA\n-TTTGATACGTCCTATAACATGGAGACGGAGATAGCCAAGCCCCTTTCTCGAAAGGCAAACCCTCTTTATCATCCAACACA\n-GGATCTCCCTATGGAATACCACCATTGTCTTTCTCCATCTCACTCAATAGTATATATCGGTATAAATAGCTCAGCTCACG\n-AAATAGAGCGAGCAAGAGCTCGAACTTGGTAGGAAAGTTCGGATAAGTCGACTCGACTCCTTAGTGCAGCGTACACTTGC\n-TGAAGCAAAACGAGGCTCATCCATTTGTTTTGATCCGAATCACACTCTAGACCAGCTCTTCTTATAGAGAAGGAACTCCA\n-CTTTGACAGTCATACGAGAGGGAAGAGACTCAAAGTTGATGAGAATAGTCGATTTGACCGGTATCACGAATAGCAGTTTT\n-AGTAGAAAAGGGCGTTGATGATTGAAAGGATTTCTCCTCGAACCACTAGTAACTATGTCAGCTTTTGTCATTCTGATGAT\n-ACTGAATCGGATCAATATTTGGAATAACAATATCTGATCTATCAAATCGATTCATCATCGAGAATGGAATAGTATAACAT\n-AGGAATTTGATCCAAACAAACTCCGAATTGGGATTTCTTATTGGATCAGGAATCCCATTGCATTTTGCATCCTTTTCCAT\n-TTTGCTTCTTTCAGGCAGTTTTGGCTCGCAATAAAGCTAGGGTCCTGATCGAGCAAGACTACGTCCTATCTATCTACCTC\n-TCCAAACACAATATCTTGAGTACCTATGATGGTGACTACATCTGCTGGCATGTGATGTTTGGACATAGAATCGAGTCCTT\n-GTGAATGGGCAAAGCCAGGTGCTCTTATTTTACAACGGTAGGGACGATTACTCCCATTACTGACAAGAAAGACACCAAAT\n-TCACCTTTAGGTGCTTCAACTGCGGTATAGGTAGAAGGAGCTGGTACGGAAAAACCTTCTGTATAAAGTTCGAAATGGTG\n-AATTGAGGTAGAGTAGACCGATGATCC'..b'AACACTTCCAACTTCCCAGGGTTTTGCCCTTATTTTGTTTTTCTTTGTC\n+CTTGTGTTTAATCTTTTCTAATTCTACTAGCAATCCTATAAGAATTTCTTGCAATATAGAAGAATTTCTTCTAATTAATT\n+CTATTTCTTGAATTCTTTCATCAATGGTATTGGGCAAAATCCCTTTTTTGACTCTGTGCCGTCGATTCTATTATTATTAG\n+TATTAGTGAAGAATTATTATTAGTATTAGTGAAGAATAATGGAAAATTTATTTCATATTCATATAGATAGGAGACATAAT\n+TCACATGGATATAGTAAGTCTCGCTTGGGCTGCTTTAATGGTAGTCTTTACATTTTCTCTTTCACTAGTAGTATGGGGAA\n+GAAGTGGACTCTAAGGATACTATTAATTGAGTTCAGAAATCAAACTGTACCGATTCTTTTAGAGATCGTTCTGCAAAGAC\n+TTTTTTAATTTAACCATTGAAATTGAATTCAAATTCAATTGAATTAAAAGGATCTTCATTATATTCGATTATATTCGGGT\n+GGAGTAATGTATTCTATGAATAATATATTAATAATATATGAATAATACCCCTTTAATCTAAGATATCTTATCTTCTTCGA\n+CAAGTCACATATTGCGCACTTAGTGCTTAGTTTAGTATTTGTTTTATTATTTTAAATTTTATTTTAGAAATTGGATTTAT\n+GCTATATTTTATTGACTTGACTCATTTCATATCATGATTCAAATCTTTAAAAGATTAAAAAATCTGTGAGGTTTACTTTA\n+CTAATCTTTTTCCTCAACACCGGAAAAGGTTTTTATAGAATTCTTTTCCTTGGATGATCTGTTAACTGCTTAATCAATTA\n+CTTCTGCCTTCTTCTTCAAAATGGTAAAAAAAGATTTCTTGATTTTCTTTTTTTAATATATATGTTCTTTTATTTATATG\n+TTTATATGCCCAGACTCTTTTTTTTTCCTTTTCATTTATTTTATTCTTTCGTTAAATGCGATTCCGTAATTTCTATTGAA\n+AATAATCAGAAATCTAGGAATTCGAATTGTAAGAGTAAGAGTTGCTTTTCGACTATTTCAGATTAATTGGAATCAACACA\n+AATGAAGAAAAAAGAAATAGAATTGGGGCTTTATGTACATTACATAGAGATAATGGATTTCTAGATATATGAATATGGAT\n+AGAAAGATGATATTCTAGATTGATCTACATTAAGTATATTTTTATTTAAGTATATATTTGTATATAGTACAACTGTATAC\n+ACTAGAATAGCAAAAATAGATAGTATGGTAGAAAGAAAACTTTTCTTTCTACCATACTATCTGATCTTACCATACTATCT\n+GATCTTATAGAATACTGCTGATTCTAGTCCGCTCATTTCATCTAAAACGGCGAAATTTGAATCCTTTTCATTTTCTAATC\n+GCCGATATTAATAAGAACTAAGATGATATTAATAAGAACTAAGAAGTCAAGTTTCATTCAAATTAATCACTTTGACTGAC\n+AGTTTTTACGTATATTATAAGTAAAAAGGCAGTAGGAACAAGAATGAACAGCGCAGTAGCAATAAATGCGAGAATATTTA\n+CTTCCATAGTCTCACTCTTTCGTTTTTCTTTACTTTGCAATAACTCGGGATTTAATCCCATAGAGATGATAAATCTTTCG\n+CCTCTAAATTCAATGATATGAATTCCATCTCGATGATATCGAATCGAATCAATATCATGAATAACAATATCGGAGCTATC\n+AAATCGATTTATCGTTGAGAATTGAATAGTATAACATAGAAAGATCGTTTATCCATACCGAATCCAAAAATGGATTCCTG\n+GTATAATCGAGAATTTTTTTTTTTTACTTTATTTTTCATTCTTTTCCATTCTTTTTTTTCTATAAAATTCTCGCCTTCCT\n+TAGTACAATCCATTTGTCGAAGTCTCATCTAACCGTGTTTTTTTATTTTGAGACTTAGACTCGTTATAAACAAACCCAAA\n+CAAAGAAACAATAGAAGCAAAATGGAGAAAGGGGAATTCAGTTCTAAACTCTTTGTTAATGATCTAATCTTATTGTAAGT\n+AAAACAAAAAAAAAAAGTGTGATAAAGACAAGGGCAAGTACGGTATAAAAAAGATCGAATATTCTACCAAATTCAATTTT\n+ATTTGTATCGTATAAATACAAATTCGATTTGTGTATGGACTGCCACGAAAGATATGGTACTCGATTCGATTTCATTACAC\n+GAATCGGTAGAAATCAAAAAAGTCAAACTCAGTATGGTATCTCTTGGAATCCTGAATATAATGTTATCTATGACTGCACT\n+AATCCATATATGTCTTTATCAATCGGTACTGGTTGAAGAACTGAAAATTCCCATATTTCTATTTGCTTTGATGAGAACTG\n+AAAAACGAAAATAAATATGAAAATAAATAGAAAAAAAGAAATAGAAATAAGAATAGAAATAATAAAAAATAAGAAAAAAG\n+AAAAAGAAAGAAATGGAAATAAGGTTCCCTTTTGAAATGAAATTATACTATTTGTCCTCGTTTGACAGAAAATGGAGAGA\n+GAATTTGATATATATATATATTTTAGTAAATTATTGAATTTTGATCTGTCGGGACTGACGGGGCTCGAACCCGCAGCTTC\n+CGCCTTGACAGGGCGGTGCTCTGACCAATTGAACTACAATCCCAGAGAAATGAAATAAAGTATAGAGCATACATATTCTT\n+ATGATTTCATTCAAACCCTTTCTAGTTAGATTTTAGATTGGAATTGCCACGTTGTAACAGAGACGTGAGTTTTCTATTGC\n+TAAACACATGGATATAAACTTTAGCGATAACGACACGGATTACTACTCATTTTTTCATTATGTCAAATCCAAATCGATTG\n+ATAATCAATCTTTCAATGAAAAAAGAGTCTTTTTTTCTTTACATTTCTCTTTTTCTTAGACTTTATACTTACAAATCCTG\n+ATATGAATCTGGATCAAGAGCAAGATCCGAATTTGTGGAAAAAAAAAAAATAGAGCAAATCAAATAAATAATAAATAACA\n+GGTAAAAATATATCAGAAATTTTGACTTTTGTCCGCTTTTGTACGTATCAAGCATTTCAAGAGAACAAAGGGGTTATACC\n+ATTTCGTGGTGGATCAGTGAATTATTGGGCCGAGCTGGATTTGAACCAGCGTAGACATATTGCCAACGAATTTACAGTCC\n+GTCCCCATTAACCGCTCGGGCATCGACCCAGGAAGAATCAACTCCAGACTTATTATATTAACTTAATATATTTTAATATA\n+TTTTATTTATATTAACTTAATATATTTTATATTAAAAATCCATGATCAACTTCCTTTCGTAATACCCTACCCCCAGGGGA\n+AGTCGAATCCCCGCTGCCTCCTTGAAAGAGAGATGTCCTGAACCACTAGACGATGGGGGCACAGTTGCCCGACCGTCAGC\n+ATATTATGCTCATAGTATGAACAGTTTTTTGAAATTGTCAATATAACGAAATAGTCTAATTAGATTTGAAAGATCTTTCC\n+GTCTTTCATGATTATTTTTGATTCGTCATTTATATCCATGAATTATTCATTCTAATATCAATTGGAATTTTTATTATTTT\n+TTTTTTTTATTAATTATTTTTTTTTTTACTAATTATTTTGTTTTTATTTTTATTTGAATTTAAAAAAGATTTTTAAATAT\n+TTTAATTTAAATATTTTAAATAATATAAATATAATAAAATATAATAAAAAAAAAAGAATAAAATAGATAAGAATAAAATA\n+GATAAAATAATAGAAATCGAAGAAATAGAATAGAAGAATAGAAATAATACGAAAGATTCTTTCCTTTCAAGGAATGGAAT\n+GATTGATTTCCCAGCAAGCCGTAAAGGAGGGTTAAACCCCACTTCTTCCGCTTTCATTCATTGAT\n\\ No newline at end of file\n' |