view multiqc.xml @ 16:5e4080791d90 draft default tip

Uploaded
author engineson
date Mon, 24 Jul 2017 08:43:45 -0400
parents d6579f50e622
children
line wrap: on
line source

<tool id="multiqc" name="multiqc" version="@WRAPPER_VERSION@.0">
    <description>aggregate results from bioinformatics analyses into a single report</description>
    <macros>
        <token name="@WRAPPER_VERSION@">1.0.0</token>
    </macros>
    <requirements>
       <requirement type="package" version="@WRAPPER_VERSION@">multiqc</requirement>
    </requirements>
    <version_command>@WRAPPER_VERSION@</version_command>
    <command detect_errors="aggressive">
<![CDATA[
mkdir multiqc_WDir &&

#for $i, $repeat in enumerate( $results )
    mkdir multiqc_WDir/${repeat.software}_${i} &&

    #if str($repeat.software) == "fastqc":
        ## Searches for files named "fastqc_data.txt"
        #for $k, $file in enumerate($repeat.input_file):
            mkdir multiqc_WDir/${repeat.software}_${i}/file_${k} &&
            ln -s '${file}' multiqc_WDir/fastqc_${i}/file_${k}/fastqc_data.txt &&
        #end for
    #else if str($repeat.software) == "tophat":
        ## Searches for files ending in "align_summary.txt"
        #for $file in $repeat.input_file:
            ln -s '${file}' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}align_summary.txt' &&
        #end for
    #else if str($repeat.software) == "bowtie2":
        ## Searches for files containing 'reads; of these;'
        #for $file in $repeat.input_file:
            ln -s '${file}' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}.txt' &&
        #end for
    #else if str($repeat.software) == "cutadapt":
        ## Searches for files containing 'This is cutadapt'
        #for $file in $repeat.input_file:
            cat '${file}' > 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}.txt' &&
            ## replace header for old cutadapt release
            sed -i .old 's/You are running/This is/' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}.txt' &&
        #end for
    #else if str($repeat.software) == "featurecounts":
        ## Checks for files ending in '.summary'
        #for $file in $repeat.input_file
            #if $file.metadata.column_names and $file.metadata.column_names.find(',') != -1
                echo '$file.metadata.column_names.replace(',','\t').replace('__ob__u','').replace('u__sq__','').replace('__sq__','').replace('__cb__','')' >> 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}.summary' &&
                cat '$file' >> 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}.summary' &&
            #else
                ln -s '$file' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}.summary' &&
            #end if
        #end for
    #else if str($repeat.software) == "bismark":
        ## Checks for files ending in _SE_report.txt
        #for $file in $repeat.input_file
            ln -s ${file} 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}_SE_report.txt' &&
        #end for
    #else if str($repeat.software) == "samtools":
        ## Checks for files containing 'This file was produced by samtools stats'
        #for $file in $repeat.input_file
            ln -s ${file} 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}' &&
        #end for
    #else if str($repeat.software) == "picard":
        #for $file in $repeat.input_file
            ln -s '${file}' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}' &&
        #end for
    #else if str($repeat.software) == "samtools_idxstats":
        ## Checks for files containing "idxstats" in the name
        #for $file in $repeat.input_file
            ln -s '${file}' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}_idxstats.txt' &&
        #end for
    #else if str($repeat.software) == "htseq":
        ## Checks for files containing "__too_low_aQual"
        #for $file in $repeat.input_file
            ln -s '${file}' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}' &&
        #end for
    #else if str($repeat.software) == "rnastar_log":
        ## Checks for files named Log.final.out
        #for $k, $file in enumerate($repeat.input_file):
            mkdir 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}' &&
            ln -s '${file}' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}/Log.final.out' &&
        #end for
    #else if str($repeat.software) == "rnastar_counts":
        ## Checks for files named ReadsPerGene.out.tab
        #for $k, $file in enumerate($repeat.input_file):
            mkdir 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}' &&
            ln -s '${file}' 'multiqc_WDir/${repeat.software}_${i}/${file.element_identifier}/ReadsPerGene.out.tab' &&
        #end for
    #end if
#end for

multiqc multiqc_WDir
    ]]></command>
    <inputs>
        <repeat name="results" title="Results" min="1">
            <param name="software" type="select" label="Software name" help="Which tool was used generate logs?">
                <option value="fastqc">FastQC</option>
                <option value="cutadapt">Cutadapt/Trim Galore!</option>
                <option value="tophat">Tophat2</option>
                <option value="featurecounts">FeatureCounts (Summary file)</option>
                <option value="samtools">Samtools (Stats, Flagstat)</option>
                <option value="samtools_idxstats">Samtools (Idxstats)</option>
                <option value="picard">Picard</option>
                <option value="bismark">Bismark</option>
                <option value="bowtie2">Bowtie2</option>
                <option value="htseq">HTSeq-Count</option>
                <option value="rnastar_log">RNA STAR (log)</option>
                <option value="rnastar_counts">RNA STAR (reads per gene)</option>
            </param>
            <param name="input_file" type="data" format="txt, tabular" multiple="true" label="Result file" help="Select input datasets"/>
        </repeat>
        <param name="saveLog" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Save log file" help="Save the multiQC log file to the history. This is mostly useful for debugging purposes."/>
    </inputs>
    <outputs>
        <data format="html" from_work_dir="multiqc_report.html" name="html_file" label="${tool.name} on ${on_string}: Webpage" />
        <data format="txt" name="text_file" from_work_dir="multiqc_data/multiqc.log" label="${tool.name} on ${on_string}: Log">
            <filter>saveLog</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <repeat name="results">
                <param name="software" value="fastqc" />
                <param name="input_file" value="fastqc_data.txt" />
            </repeat>
            <param name="saveLog" value="True"/>
            <output name="html_file" file="report_fastqc.html" compare="sim_size" delta="1000"/>
            <output name="text_file" file="log_fastqc.txt" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="fastqc" />
                <param name="input_file" value="fastqc_data.txt,fastqc_data_2.txt" />
            </repeat>
            <output name="html_file" file="report_fastqc_2.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="cutadapt" />
                <param name="input_file" value="cutadapt.txt" />
            </repeat>
            <output name="html_file" file="report_cutadapt.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="tophat" />
                <param name="input_file" value="tophat_data.txt" />
            </repeat>
            <output name="html_file" file="report_tophat.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="bowtie2" />
                <param name="input_file" value="bowtie2SE1.txt,bowtie2SE2.txt" />
            </repeat>
            <output name="html_file" file="report_bowtie2SE.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="featurecounts" />
                <param name="input_file" value="featurecounts_data.txt" />
            </repeat>
            <output name="html_file" file="report_featurecounts.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="picard" />
                <param name="input_file" value="collectGcBias_data.txt,CollectInsertSizeMetrics.txt,MarkDuplicates_data.txt,picard_CollectBaseDistributionByCycle.txt,picard_CollectRnaSeqMetrics.txt,picard_CollectAlignmentSummaryMetrics.txt" />
            </repeat>
            <output name="html_file" file="report_picard.html" compare="sim_size" delta="10000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="htseq" />
                <param name="input_file" value="htseq_data.txt" />
            </repeat>
            <output name="html_file" file="report_htseq.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="rnastar_log" />
                <param name="input_file" value="rnastar_log.txt" />
            </repeat>
            <output name="html_file" file="report_rnastar_log.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="rnastar_counts" />
                <param name="input_file" value="rnastar_counts.txt" />
            </repeat>
            <output name="html_file" file="report_rnastar_counts.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="bismark" />
                <param name="input_file" value="bismark_data.txt" />
            </repeat>
            <output name="html_file" file="report_bismark.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="samtools" />
                <param name="input_file" value="samtools_stats.txt,samtools_flagstat.txt" />
            </repeat>
            <output name="html_file" file="report_samtools.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="samtools_idxstats" />
                <param name="input_file" value="samtools_idxstats.txt" />
            </repeat>
            <output name="html_file" file="report_samtools_idxstats.html" compare="sim_size" delta="1000"/>
        </test>
        <test>
            <repeat name="results">
                <param name="software" value="fastqc" />
                <param name="input_file" value="fastqc_data.txt,fastqc_data_2.txt" />
            </repeat>
            <repeat name="results">
                <param name="software" value="cutadapt" />
                <param name="input_file" value="cutadapt.txt" />
            </repeat>
            <repeat name="results">
                <param name="software" value="tophat" />
                <param name="input_file" value="tophat_data.txt" />
            </repeat>
            <repeat name="results">
                <param name="software" value="featurecounts" />
                <param name="input_file" value="featurecounts_data.txt" />
            </repeat>
            <repeat name="results">
                <param name="software" value="picard" />
                <param name="input_file" value="collectGcBias_data.txt,CollectInsertSizeMetrics.txt,MarkDuplicates_data.txt" />
            </repeat>
            <repeat name="results">
                <param name="software" value="bismark" />
                <param name="input_file" value="bismark_data.txt" />
            </repeat>
            <repeat name="results">
                <param name="software" value="samtools" />
                <param name="input_file" value="samtools_stats.txt,samtools_flagstat.txt" />
            </repeat>
            <output name="html_file" file="report_all.html" compare="sim_size" delta="5000"/>
        </test>
    </tests>

    <help><![CDATA[
**What it does**

MultiQC aggregates results from bioinformatics analyses across many samples into a single report. It takes results of multiple analyses and creates a report that can be viewed as a single beautiful web-page. It's a general use tool, perfect for summarizing the output from numerous bioinformatics tools.

**Inputs**

MultiQC takes software output summaries/logs and creates a single report from them. You need to tell the tool which software was used to generate the report. This is done using the **Software name** dropdown. At present the following Galaxy tools produce logs that can used with MultiQC. There are:

- Fastqc
- Cutadapt / Trim Galore!
- Tophat2
- FeatureCounts (summary file with the column header in the first line or as metadata)
- Samtools (stats, flagstat, dxstats)
- Picard (MarkDuplicatesMetrics, CollectGCBiasMetrics, CollectInsertSizeMetrics, CollectAlignmentSummaryMetrics, CollectRnaSeqMetrics)
- Bismark (Alignment report file)
- Bowtie2 (Metrics file)
- HTSeq-count ("no feature" file; although the "Assigned" metric is always 0)
- RNA STAR (Alignment from Log.final.out, Gene counts from ReadsPerGene.out.tab)

----

**Integrated by**

Cyril Monjeaud and Yvan Le Bras

`EnginesOn &lt;http://engineson.fr/&gt;`_ and Rennes GenOuest Bio-informatics Core Facility

    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btw354</citation>
    </citations>
</tool>