Mercurial > repos > iuc > featurecounts
diff featurecounts.xml @ 10:46cccc52be5f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/featurecounts commit cf1ae941d02bff8848f05c4e4039457656e3a4e8
author | iuc |
---|---|
date | Sun, 14 Jan 2018 09:23:49 -0500 |
parents | e6a2a912677a |
children | e803ca6407c0 |
line wrap: on
line diff
--- a/featurecounts.xml Fri Nov 17 06:02:56 2017 -0500 +++ b/featurecounts.xml Sun Jan 14 09:23:49 2018 -0500 @@ -1,17 +1,27 @@ -<tool id="featurecounts" name="featureCounts" version="1.6.0.1" profile="16.04"> +<tool id="featurecounts" name="featureCounts" version="1.6.0.2" profile="16.04"> <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> <requirements> <requirement type="package" version="1.6.0">subread</requirement> </requirements> <version_command>featureCounts -v 2>&1 | grep .</version_command> - <command><![CDATA[ + <command detect_errors="exit_code"><![CDATA[ + ## Export fc path for its built-in annotation + export FC_PATH=\$(command -v featureCounts | sed 's@/bin/featureCounts$@@') && + ## Check whether all alignments are from the same type (bam || sam) featureCounts - #if $gtf_source.ref_source=="history": - -a '$gtf_source.reference_gene_sets' - #else: - -a '$gtf_source.reference_gene_sets_builtin.fields.path' + + #if $anno.anno_select=="gtf": + #if $anno.gtf_source.ref_source=="history": + -a '$anno.gtf_source.reference_gene_sets' + #else: + -a '$anno.gtf_source.reference_gene_sets_builtin.fields.path' + #end if + -F "GTF" + #elif $anno.anno_select=="builtin": + -a \${FC_PATH}/annotation/${anno.genome}_RefSeq_exon.txt + -F "SAF" #end if -o "output" @@ -24,13 +34,13 @@ -s $extended_parameters.strand_specificity $extended_parameters.multimapping_enabled.multimapping_counts - #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M" + #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M": $extended_parameters.multimapping_enabled.fraction #end if $extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads - #if str($extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads) == "-J" - #if $extended_parameters.exon_exon_junction_read_counting_enabled.genome + #if str($extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads) == "-J": + #if $extended_parameters.exon_exon_junction_read_counting_enabled.genome: -G '$extended_parameters.exon_exon_junction_read_counting_enabled.genome' #end if #end if @@ -48,18 +58,18 @@ $extended_parameters.primary $extended_parameters.ignore_dup - #if str($extended_parameters.read_extension_5p) != "0" + #if str($extended_parameters.read_extension_5p) != "0": --readExtension5 $extended_parameters.read_extension_5p #end if - #if str($extended_parameters.read_extension_3p) != "0" + #if str($extended_parameters.read_extension_3p) != "0": --readExtension3 $extended_parameters.read_extension_3p #end if $pe_parameters.fragment_counting_enabled.fragment_counting - #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p" + #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p": $pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance - #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P" + #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P": -d $pe_parameters.fragment_counting_enabled.check_distance_enabled.minimum_fragment_length -D $pe_parameters.fragment_counting_enabled.check_distance_enabled.maximum_fragment_length #end if @@ -70,11 +80,19 @@ '${alignment}' - ## Removal of comment and column-header line - && grep -v "^#" "output" | tail -n+2 > body.txt + ## Removal of comment + && grep -v "^#" "output" + #if $format.value != "tabdel_short": + ## and remove column-header line + | tail -n+2 + #else + ## update header + | sed --expression='s|${alignment}|${alignment.element_identifier}|g' + #end if + > body.txt ## Set the right columns for the tabular formats - #if $format.value == "tabdel_medium" + #if $format.value == "tabdel_medium": && cut -f 1,7 body.txt > expression_matrix.txt ## Paste doesn't allow a non ordered list of columns: -f 1,7,8,6 will only return columns 1,7 and 8 @@ -82,23 +100,30 @@ && cut -f 6 body.txt > gene_lengths.txt && paste expression_matrix.txt gene_lengths.txt > expression_matrix.txt.bak && mv -f expression_matrix.txt.bak '${output_medium}' - #elif $format.value == "tabdel_short" + #elif $format.value == "tabdel_short" or $format.value == "tabdel_short_noheader": && cut -f 1,7 body.txt > '${output_short}' - #else + #else: && cp body.txt '${output_full}' #end if - - #if str($include_feature_length_file) == "true" + #if str($include_feature_length_file) == "true": && cut -f 1,6 body.txt > '${output_feature_lengths}' #end if - #if str($extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads) == "-J" - && tail -n+2 'output.jcounts' > '${output_jcounts}' + #if str($extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads) == "-J": + #if $format.value != "tabdel_short": + && tail -n+2 'output.jcounts' > '${output_jcounts}' + #else: + + && sed --expression='s|${alignment}|${alignment.element_identifier}|g' 'output.jcounts' > '${output_jcounts}' + #end if #end if - && tail -n+2 'output.summary' > '${output_summary}' - + #if $format.value != "tabdel_short": + && tail -n+2 'output.summary' > '${output_summary}' + #else: + && sed --expression='s|${alignment}|${alignment.element_identifier}|g' 'output.summary' > '${output_summary}' + #end if ]]></command> <inputs> <param name="alignment" @@ -107,26 +132,41 @@ format="bam,sam" label="Alignment file" help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" /> - - <conditional name="gtf_source"> - <param name="ref_source" type="select" label="Gene annotation file"> - <option value="cached">locally cached</option> - <option value="history">in your history</option> + <conditional name="anno"> + <param name="anno_select" type="select" label="Gene annotation file"> + <option value="builtin">featureCounts built-in</option> + <option value="gtf">GTF file</option> </param> - <when value="cached"> - <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator"> - <options from_data_table="gene_sets"> - <filter type="sort_by" column="1" /> - <validator type="no_options" message="No annotations are available." /> - </options> + <when value="builtin"> + <param name="genome" type="select" label="Select built-in genome" help="Built-in gene annotations for genomes hg38, hg19, mm10 and mm9 are included in featureCounts"> + <option value="hg38">hg38</option> + <option value="hg19">hg19</option> + <option value="mm10">mm10</option> + <option value="mm9">mm9</option> </param> </when> - <when value="history"> - <param name="reference_gene_sets" - format="gff,gtf,gff3" - type="data" - label="Gene annotation file" - help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" /> + <when value="gtf"> + <conditional name="gtf_source"> + <param name="ref_source" type="select" label="Gene annotation file"> + <option value="cached">locally cached</option> + <option value="history">in your history</option> + </param> + <when value="cached"> + <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator"> + <options from_data_table="gene_sets"> + <filter type="sort_by" column="1" /> + <validator type="no_options" message="No annotations are available." /> + </options> + </param> + </when> + <when value="history"> + <param name="reference_gene_sets" + format="gff,gtf,gff3" + type="data" + label="Gene annotation file" + help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" /> + </when> + </conditional> </when> </conditional> @@ -134,7 +174,8 @@ type="select" label="Output format" help="The output format will be tabular, select the preferred columns here"> - <option value="tabdel_short" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option> + <option value="tabdel_short_noheader" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option> + <option value="tabdel_short">Gene-ID "\t" read-count (MultiQC/edgeR/limma-voom compatible, includes header in output)</option> <option value="tabdel_medium">Gene-ID "\t" read-count "\t" gene-length</option> <option value="tabdel_full">featureCounts 1.4.0+ default (includes regions provided by the GTF file)</option> </param> @@ -291,7 +332,7 @@ label="Long reads" help="If specified, long reads such as Nanopore and PacBio reads will be counted. Long read counting can only run in one thread and only reads (not read-pairs) can be counted." /> - <param name="by_read_group" argument="--byReadGroup" type="boolean" truevalue="--byReadGroup" falsevalue="" + <param name="by_read_group" argument="--byReadGroup" type="boolean" truevalue="--byReadGroup" falsevalue="" label="Count reads by read group" help="If specified, reads are counted for each read group separately. The 'RG' tag must be present in the input BAM/SAM alignment files." /> @@ -311,7 +352,7 @@ label="Minimum bases of overlap" help="Specify the minimum required number of overlapping bases between a read (or a fragment) and a feature. 1 by default. If a negative value is provided, the read will be extended from both ends." /> - <param name="frac_overlap" + <param name="frac_overlap" type="integer" value="0" min="0" @@ -320,7 +361,7 @@ label="Minimum fraction (of read) overlapping a feature" help="Specify the minimum required fraction of overlapping bases between a read (or a fragment) and a feature. Value should be within range [0,1]. 0 by default. Number of overlapping bases is counted from both reads if paired end. Both this option and '--minOverlap' need to be satisfied for read assignment." /> - <param name="frac_overlap_feature" + <param name="frac_overlap_feature" type="integer" value="0" min="0" @@ -391,7 +432,7 @@ <data format="tabular" name="output_short" label="${tool.name} on ${on_string}"> - <filter>format == "tabdel_short"</filter> + <filter>format == "tabdel_short_noheader" or format == "tabdel_short"</filter> <actions> <action name="column_names" type="metadata" default="Geneid,${alignment.element_identifier}" /> </actions> @@ -408,7 +449,6 @@ <data format="tabular" name="output_summary" - hidden="true" label="${tool.name} on ${on_string}: summary"> <actions> <action name="column_names" type="metadata" default="Status,${alignment.element_identifier}" /> @@ -428,15 +468,17 @@ label="${tool.name} on ${on_string}: junction counts"> <filter>extended_parameters['exon_exon_junction_read_counting_enabled']['count_exon_exon_junction_reads']</filter> <actions> - <action name="column_names" type="metadata" default="PrimaryGene,SecondaryGene,Site1_chr,Site1_location,Site1_strand,Site2_chr,Site2_location,Site2_strand,${alignment.element_identifier}" /> + <action name="column_names" type="metadata" + default="PrimaryGene,SecondaryGene,Site1_chr,Site1_location,Site1_strand,Site2_chr,Site2_location,Site2_strand,${alignment.element_identifier}" /> </actions> </data> </outputs> <tests> <test expect_num_outputs="4"> <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="anno_select" value="gtf"/> <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> - <param name="format" value="tabdel_short" /> + <param name="format" value="tabdel_short_noheader" /> <param name="include_feature_length_file" value="true"/> <param name="ref_source" value="history" /> <param name="count_exon_exon_junction_reads" value="-J"/> @@ -452,6 +494,7 @@ </test> <test expect_num_outputs="3"> <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="anno_select" value="gtf"/> <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> <param name="format" value="tabdel_medium" /> <param name="include_feature_length_file" value="true"/> @@ -465,6 +508,7 @@ </test> <test expect_num_outputs="3"> <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="anno_select" value="gtf"/> <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> <param name="format" value="tabdel_full" /> <param name="include_feature_length_file" value="true"/> @@ -479,7 +523,35 @@ <metadata name="column_names" value="Feature,Length"/> </output> </test> - + <test expect_num_outputs="4"> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="anno_select" value="gtf"/> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="format" value="tabdel_short" /> + <param name="include_feature_length_file" value="true"/> + <param name="ref_source" value="history" /> + <param name="count_exon_exon_junction_reads" value="-J"/> + <output name="output_short" file="output_1_short_with_header.tab"> + <metadata name="column_names" value="Geneid,featureCounts_input1.bam"/> + </output> + <output name="output_summary" file="output_1_summary_with_header.tab"> + <metadata name="column_names" value="Status,featureCounts_input1.bam"/> + </output> + <output name="output_jcounts" file="output_1_jcounts_with_header.tab"> + <metadata name="column_names" value="PrimaryGene,SecondaryGene,Site1_chr,Site1_location,Site1_strand,Site2_chr,Site2_location,Site2_strand,featureCounts_input1.bam"/> + </output> + </test> + <!-- Ensure built-in annotation works --> + <test expect_num_outputs="2"> + <param name="alignment" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" ftype="bam" /> + <param name="anno_select" value="builtin"/> + <param name="format" value="tabdel_short" /> + <param name="genome" value="hg19" /> + <output name="output_short" file="output_builtin_hg19.tab"> + <metadata name="column_names" value="Geneid,pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> + </output> + <output name="output_summary" file="output_summary_builtin_hg19.tab"/> + </test> </tests> <help><![CDATA[ @@ -488,7 +560,7 @@ Overview -------- -FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files. +FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files. FeatureCounts is part of the Subread_ package. Input formats ------------- @@ -497,14 +569,18 @@ - SAM format, http://samtools.sourceforge.net/samtools.shtml#5 - BAM format -Gene regions should be provided in the GFF/GTF format: +Annotations for gene regions should be provided in the GFF/GTF format: - http://genome.ucsc.edu/FAQ/FAQformat.html#format3 - http://www.ensembl.org/info/website/upload/gff.html +Alternatively, the featureCounts built-in annotations for genomes hg38, hg19, mm10 and mm9 can be used through selecting the built-in option above. These annotations were downloaded from NCBI RefSeq database and then adapted by merging overlapping exons from the same gene to form a set of disjoint exons for each gene. Genes with the same Entrez gene identifiers were also merged into one gene. See the Subread_ User's Guide for more information. + Output format ------------- FeatureCounts produces a table containing counted reads, per gene, per row. Optionally the last column can be set to be the effective gene-length. These tables are compatible with the DESeq2 Galaxy wrapper by IUC. Column names are added as metadata object. + +.. _Subread: http://bioinf.wehi.edu.au/subread-package/SubreadUsersGuide.pdf ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btt656</citation>