Mercurial > repos > iuc > hicstuff_pipeline
changeset 0:1efd17d2bfdb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicstuff commit 021a99a3416955cd6906e95245da604fda92b255
author | iuc |
---|---|
date | Fri, 25 Nov 2022 11:32:55 +0000 |
parents | |
children | 6956f0783d77 |
files | hicstuff_pipeline.xml macros.xml test-data/info_contigs.txt test-data/sample.reads_for.fastq.gz test-data/sample.reads_rev.fastq.gz test-data/seq.fa.gz |
diffstat | 6 files changed, 144 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hicstuff_pipeline.xml Fri Nov 25 11:32:55 2022 +0000 @@ -0,0 +1,107 @@ +<tool id="hicstuff_pipeline" name="hicstuff full pipeline" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>generates a Hi-C contact matrix</description> + <macros> + <import>macros.xml</import> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ +hicstuff pipeline + --genome '$genome' + --outdir results + --aligner $aligner + $circular + $duplicates + --enzyme '$enzyme' + $filter + --mapping $mapping + --matfmt $matfmt + --quality-min $quality_min + --size $size + --threads \${GALAXY_SLOTS:-1} + #if $paired_cond.paired_select == "paired" + '$paired_cond.reads.forward' + '$paired_cond.reads.reverse' + #else + '$forward_reads' + '$reverse_reads' + #end if + ]]></command> + <inputs> + <param type="data" name="genome" format="fasta,fasta.gz" label="Genome fasta file"/> + <conditional name="paired_cond"> + <param name="paired_select" type="select" label="Paired reads"> + <option value="paired">In a dataset pair</option> + <option value="separate">In two separate datasets</option> + </param> + <when value="paired"> + <param name="reads" type="data_collection" collection_type="paired" format="fastqsanger,fastqsanger.gz" label="Paired reads"/> + </when> + <when value="separate"> + <param name="forward_reads" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads"/> + <param name="reverse_reads" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads"/> + </when> + </conditional> + <param argument="--aligner" type="select" label="Alignment software to use" help="Minimap2 should only be used for reads > 100 bp"> + <option value="bowtie2" selected="true">bowtie2</option> + <option value="minimap2">minimap2</option> + <option value="bwa">bwa</option> + </param> + <param argument="--circular" type="boolean" truevalue="--circular" falsevalue="" label="Circular genome"/> + <param argument="--duplicates" type="boolean" truevalue="--duplicates" falsevalue="" label="Removes PCR duplicates" help="PCR duplicates are defined as sets of pairs having identical mapping positions for both reads."/> + <param argument="--enzyme" type="text" value="5000" label="Bin size or enzyme" help="Restriction enzyme or 'mnase' if a string, or chunk size (i.e. resolution) if a number. Can also be multiple comma-separated enzymes."/> + <param argument="--filter" type="boolean" truevalue="--filter" falsevalue="" label="Filters out spurious 3C events, such as self religations or undigested fragments" help="This is only really useful at very fine resolutions (1-2kb) and not needed most of the time. This option is only meaningful when --enzyme is given a restriction enzyme and not a bin size."/> + <param argument="--mapping" type="select" label="Parameter of mapping" help="'normal': Directly map reads without any process. 'iterative': Map reads iteratively using iteralign, by truncating reads to 20bp and then repeatedly extending to align them. 'cutsite': Cut reads at the religation sites of the given enzyme using cutsite, create new pairs of reads and then align them ; enzyme is required"> + <option value="normal" selected="true">normal</option> + <option value="iterative">iterative</option> + <option value="cutsite">cutsite</option> + </param> + <param argument="--matfmt" type="select" label="Format of the output sparse matrix" help="Available formats are bg2 (bedgraph2d), graal (graal-compatible plain text COO format) and cool, a binary format that is probably the most appropriate for large genomes."> + <option value="bg2">bg2</option> + <option value="cool">cool</option> + <option value="graal" selected="true">graal</option> + </param> + <param argument="--quality-min" type="integer" value="30" label="Minimum mapping quality for selecting contacts"/> + <param argument="--size" type="integer" value="0" label="Minimum size threshold to consider contigs. Keep all contigs by default."/> + </inputs> + <outputs> + <data name="abs_fragments_contacts_weighted" from_work_dir="./results/abs_fragments_contacts_weighter.txt" format="tabular"/> + <data name="fragments_list" from_work_dir="./results/fragments_list.txt" format="tabular"/> + <data name="info_contigs" from_work_dir="./results/info_contigs.txt" format="tabular"/> + </outputs> + <tests> + <test> + <param name="genome" value="seq.fa.gz" /> + <param name="paired_cond|paired_select" value="separate"/> + <param name="paired_cond|forward_reads" value="sample.reads_for.fastq.gz" /> + <param name="paired_cond|reverse_reads" value="sample.reads_rev.fastq.gz" /> + <output name="info_contigs" file="info_contigs.txt"/> + <assert_stderr> + <has_text text="Contact map generated" /> + </assert_stderr> + </test> + </tests> + <help><![CDATA[ + +hicstuff is a toolkit to generate and manipulate Hi-C matrices. + +The "hicstuff full pipeline" tool generates a Hi-C contact matrix. +Output files can be used with instaGRAAL downstream. + +----------- +Input files +----------- +* the fasta genome file +* forward reads +* reverse reads + +------------ +Output files +------------ +* abs_fragments_contacts_weighter.txt: Sparse matrix file with 3 columns the rows, column and values of nonzero pixels. The first row contains the shape and total number of nonzero pixels in the matrix. +* fragments_list.txt: Contains genomic coordinates of the matrix bins (row/columns). +* info_contigs.txt: Contains chromosome names, theirs length and number of bins. + + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Nov 25 11:32:55 2022 +0000 @@ -0,0 +1,34 @@ +<macros> + <token name="@TOOL_VERSION@">3.1.5</token> + <token name="@PROFILE@">21.09</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.1.5">hicstuff</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.5281/zenodo.4066363</citation> + <citation type="bibtex"> +@software{cyril_matthey_doret_2020_4066351, + author = {Cyril Matthey-Doret and + Lyam Baudry and + Amaury Bignaud and + Axel Cournac and + Remi-Montagne and + Nadège Guiglielmoni and + Théo Foutel-Rodier and + Vittore F. Scolari}, + title = {hicstuff: Simple library/pipeline to generate and handle Hi-C data }, + month = oct, + year = 2020, + publisher = {Zenodo}, + version = {v2.3.1}, + doi = {10.5281/zenodo.4066351}, + url = {http://doi.org/10.5281/zenodo.4066363} +}</citation> + <yield /> + </citations> + </xml> +</macros>