Mercurial > repos > iuc > featurecounts
diff featurecounts.xml @ 0:9e7a369eec58 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/featurecounts commit 03f64004f90ac0a7be67ecfc355a7b361f3c3314
author | iuc |
---|---|
date | Wed, 21 Sep 2016 07:24:39 -0400 |
parents | |
children | c7bd0cc53524 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/featurecounts.xml Wed Sep 21 07:24:39 2016 -0400 @@ -0,0 +1,442 @@ +<tool id="featurecounts" name="featureCounts" version="1.4.6.p5" profile="16.04"> + <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> + <requirements> + <requirement type="package" version="1.4.6p5">subread</requirement> + </requirements> + + <version_command>featureCounts -v 2>&1 | grep .</version_command> + <command><![CDATA[ + ## Check whether all alignments are from the same type (bam || sam) + featureCounts + -a "$reference_gene_sets" + -o "output" + -T \${GALAXY_SLOTS:-2} + + -t "$extended_parameters.gff_feature_type" + -g "$extended_parameters.gff_feature_attribute" + $extended_parameters.summarization_level + $extended_parameters.contribute_to_multiple_features + -s $extended_parameters.strand_specificity + $extended_parameters.multimapping_enabled.multimapping_counts + + #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M" + $extended_parameters.multimapping_enabled.fraction + #end if + + -Q $extended_parameters.mapping_quality + $extended_parameters.largest_overlap + --minOverlap $extended_parameters.min_overlap + $extended_parameters.read_reduction + $extended_parameters.primary + $extended_parameters.ignore_dup + + #if str($extended_parameters.read_extension_5p) != "0" + --readExtension5 $extended_parameters.read_extension_5p + #end if + + #if str($extended_parameters.read_extension_3p) != "0" + --readExtension3 $extended_parameters.read_extension_3p + #end if + + $pe_parameters.fragment_counting_enabled.fragment_counting + #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p" + $pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance + #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P" + -d $pe_parameters.fragment_counting_enabled.check_distance_enabled.minimum_fragment_length + -D $pe_parameters.fragment_counting_enabled.check_distance_enabled.maximum_fragment_length + #end if + #end if + + $pe_parameters.only_both_ends + -S $pe_parameters.orientation + $pe_parameters.exclude_chimerics + + "${alignment}" + + ## Removal of comment and column-header line + && grep -v "^#" "output" | tail -n+2 > body.txt + + ## Set the right columns for the tabular formats + #if $format.value == "tabdel_medium" + && cut -f 1,7 body.txt > expression_matrix.txt + + ## Paste doesn't allow a non ordered list of columns: -f 1,7,8,6 will only return columns 1,7 and 8 + ## Thus the gene length column (last column) has to be added separately + && cut -f 6 body.txt > gene_lengths.txt + && paste expression_matrix.txt gene_lengths.txt > expression_matrix.txt.bak + && mv -f expression_matrix.txt.bak "${output_medium}" + #elif $format.value == "tabdel_short" + && cut -f 1,7 body.txt > "${output_short}" + #else + && cp body.txt "${output_full}" + #end if + + + #if str($include_feature_length_file) == "true" + && cut -f 1,6 body.txt > "${output_feature_lengths}" + #end if + + && tail -n+2 "output.summary" > "${output_summary}" + + ]]></command> + <inputs> + <param name="alignment" + type="data" + multiple="false" + format="bam,sam" + label="Alignment file" + help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" /> + + <param name="reference_gene_sets" + format="gff,gtf,gff3" + type="data" + label="Gene annotation file" + help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" /> + + <param name="format" + type="select" + label="Output format" + help="The output format will be tabular, select the preferred columns here"> + <option value="tabdel_short" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option> + <option value="tabdel_medium">Gene-ID "\t" read-count "\t" gene-length</option> + <option value="tabdel_full">featureCounts 1.4.0+ default (includes regions provided by the GTF file)</option> + </param> + + <param name="include_feature_length_file" + type="boolean" + truevalue="true" + falsevalue="false" + selected="false" + label="Create gene-length file" + help="Creates a tabular file that contains the effective (nucleotides used for counting reads) length of the feature; might be useful for estimating FPKM/RPKM" /> + + + <section name="pe_parameters" title="Options for paired-end reads"> + <conditional name="fragment_counting_enabled"> + + <param name="fragment_counting" + type="select" + argument="-p" + checked="true" + label="Count fragments instead of reads" + help="If specified, fragments (or templates) will be counted instead of reads."> + <option value="" selected="true">Disabled; all reads/mates will be counted individually</option> + <option value=" -p">Enabled; fragments (or templates) will be counted instead of reads</option> + </param> + + <when value=" -p"> + <conditional name="check_distance_enabled"> + <param name="check_distance" + type="boolean" + truevalue=" -P" + falsevalue="" + argument="-P" + label="Check paired-end distance" + help="If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." /> + <when value=" -P"> + <param name="minimum_fragment_length" + type="integer" + value="50" + argument="-d" + label="Minimum fragment/template length." /> + <param name="maximum_fragment_length" + type="integer" + value="600" + argument="-D" + label="Maximum fragment/template length." /> + </when> + <when value="" /> + </conditional> + </when> + <when value="" /> + </conditional> + + <param name="only_both_ends" + type="boolean" + truevalue=" -B" + falsevalue="" + argument="-B" + label="Only allow fragments with both reads aligned" + help="If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." /> + + <param name="orientation" + type="select" + label="Orientation of the two read from the same pair" + argument="-S" + help="Default is 'fr'"> + <option value="fr" selected="true">Forward, Reverse (fr)</option> + <option value="ff">Forward, Forward (ff)</option> + <option value="rf">Reverse, Forward (rf)</option> + </param> + + <param name="exclude_chimerics" + type="boolean" + truevalue=" -C" + falsevalue="" + argument="-C" + checked="true" + label="Exclude chimeric fragments" + help="If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." /> + </section> + + <section name="extended_parameters" title="Advanced options"> + <param name="gff_feature_type" + type="text" + value="exon" + argument="-t" + label="GFF feature type filter" + help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." /> + + <param name="gff_feature_attribute" + type="text" + value="gene_id" + argument="-g" + label="GFF gene identifier" + help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." /> + + <param name="summarization_level" + type="boolean" + truevalue=" -f" + falsevalue="" + argument="-f" + label="On feature level" + help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> + + <param name ="contribute_to_multiple_features" + type="boolean" + truevalue=" -O" + falsevalue="" + argument="-O" + label="Allow read to contribute to multiple features" + help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" /> + + <param name="strand_specificity" + type="select" + label="Strand specificity of the protocol" + argument="-s" + help="Indicate if strand-specific read counting should be performed."> + <option value="0" selected="true">Unstranded</option> + <option value="1">Stranded (forwards)</option> + <option value="2">Stranded (reverse)</option> + </param> + + <conditional name="multimapping_enabled"> + <param name="multimapping_counts" + type="select" + argument="-M" + label="Count multi-mapping reads/fragments" + help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads."> + <option value="" selected="true">Disabled; multi-mapping reads are excluded (default)</option> + <option value=" -M">Enabled; multi-mapping reads are included</option> + </param> + <when value=" -M"> + <param name="fraction" + type="boolean" + truevalue="--fraction" + falsevalue="" + argument="--fraction" + label="Assign fractions to multimapping reads" + help="If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indica- ted by 'NH' tag) reported for the read. This option must be used together with the '-M' option." /> + </when> + <when value="" /> + </conditional> + + <param name="mapping_quality" + type="integer" + value="12" + argument="-Q" + label="Minimum mapping quality per read" + help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." /> + + <param name="largest_overlap" + type="boolean" + truevalue=" --largestOverlap" + falsevalue="" + argument="--largestOverlap" + label="Largest overlap" + help="If specified, reads (or fragments) will be assigned to the target that has the largest number of overlapping bases" /> + + <param name="min_overlap" + type="integer" + value="1" + argument="--minOverlap" + label="Minimum overlap" + help="Specify the minimum required number of overlapping bases between a read (or a fragment) and a feature. 1 by default. If a negative value is provided, the read will be extended from both ends." /> + + <param name="read_extension_5p" + type="integer" + value="0" + argument="--readExtension5" + label="Read 5' extension" + help="Reads are extended upstream by ... bases from their 5' end" /> + + <param name="read_extension_3p" + type="integer" + value="0" + argument="--readExtension3" + label="Read 3' extension" + help="Reads are extended upstream by ... bases from their 3' end" /> + + <param name="read_reduction" + type="select" + label="Reduce read to single position" + argument="--read2pos" + help="The read is reduced to its 5' most base or 3'most base. Read summarization is then performed based on thesingle base which the read is reduced to."> + <option value="" selected="true">Leave the read as it is</option> + <option value="--read2pos 5">Reduce it to the 5' end</option> + <option value="--read2pos 3">Reduce it to the 3' end</option> + </param> + + <param name="primary" + type="boolean" + truevalue=" --primary" + falsevalue="" + argument="--primary" + label="Only count primary alignments" + help="If specified, only primary alignments will be counted. Primaryand secondary alignments are identified using bit 0x100 in theFlag field of SAM/BAM files. All primary alignments in a datasetwill be counted no matter they are from multi-mapping reads ornot ('-M' is ignored)." /> + + <param name="ignore_dup" + type="boolean" + truevalue=" --ignoreDup" + falsevalue="" + argument="--ignoreDup" + label="Ignore reads marked as duplicate" + help="If specified, reads that were marked asduplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAMfile is used for identifying duplicate reads. In paired enddata, the entire read pair will be ignored if at least one endis found to be a duplicate read." /> + + <param name="count_split_alignments_only" + type="boolean" + truevalue=" --countSplitAlignmentsOnly" + falsevalue="" + argument="--countSplitAlignmentsOnly" + label="Ignore reads marked as duplicate" + help="If specified, only split alignments (CIGARstrings containing letter `N') will be counted. All the otheralignments will be ignored. An example of split alignments isthe exon-spanning reads in RNA-seq data." /> + </section> + </inputs> + <outputs> + <data format="tabular" + name="output_medium" + label="${tool.name} on ${on_string}"> + <filter>format == "tabdel_medium"</filter> + <actions> + <action name="column_names" type="metadata" default="Geneid,${alignment.name},Length" /> + </actions> + </data> + + <data format="tabular" + name="output_short" + label="${tool.name} on ${on_string}"> + <filter>format == "tabdel_short"</filter> + <actions> + <action name="column_names" type="metadata" default="Geneid,${alignment.name}" /> + </actions> + </data> + + <data format="tabular" + name="output_full" + label="${tool.name} on ${on_string}: count table"> + <filter>format == "tabdel_full"</filter> + <actions> + <action name="column_names" type="metadata" default="Geneid,Chr,Start,End,Strand,Length,${alignment.name}" /> + </actions> + </data> + + <data format="tabular" + name="output_summary" + hidden="true" + label="${tool.name} on ${on_string}: summary"> + <actions> + <action name="column_names" type="metadata" default="Status,${alignment.name}" /> + </actions> + </data> + + <data format="tabular" + name="output_feature_lengths" + label="${tool.name} on ${on_string}: feature lengths"> + <filter>include_feature_length_file</filter> + <actions> + <action name="column_names" type="metadata" default="Feature,Length" /> + </actions> + </data> + </outputs> + <tests> + <test> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="format" value="tabdel_short" /> + <param name="include_feature_length_file" value="true"/> + <output name="output" file="output_1_short.tab"/> + <output name="output_summary" file="output_1_summary.tab"/> + </test> + <test> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="format" value="tabdel_medium" /> + <param name="include_feature_length_file" value="true"/> + <output name="output" file="output_1_medium.tab"/> + <output name="output_summary" file="output_1_summary.tab"/> + </test> + <test> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="format" value="tabdel_full" /> + <param name="include_feature_length_file" value="true"/> + <output name="output" file="output_1_full.tab"/> + <output name="output_summary" file="output_1_summary.tab"/> + <output name="output_feature_lengths" file="output_feature_lengths.tab"/> + </test> + + <test> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="format" value="tabdel_short" /> + <param name="include_feature_length_file" value="true"/> + <output name="output" file="output_2_short.tab"/> + <output name="output_summary" file="output_2_summary.tab"/> + </test> + <test> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="format" value="tabdel_medium" /> + <param name="include_feature_length_file" value="true"/> + <output name="output" file="output_2_medium.tab"/> + <output name="output_summary" file="output_2_summary.tab"/> + </test> + <test> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="format" value="tabdel_full" /> + <param name="include_feature_length_file" value="true"/> + <output name="output" file="output_2_full.tab"/> + <output name="output_summary" file="output_2_summary.tab"/> + <output name="output_feature_lengths" file="output_feature_lengths.tab"/> + </test> + </tests> + + <help><![CDATA[ +featureCounts +############# + +Overview +-------- +FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files. + +Input formats +------------- +Alignments should be provided in either: + + - SAM format, http://samtools.sourceforge.net/samtools.shtml#5 + - BAM format + +Gene regions should be provided in the GFF/GTF format: + + - http://genome.ucsc.edu/FAQ/FAQformat.html#format3 + - http://www.ensembl.org/info/website/upload/gff.html + +Output format +------------- +FeatureCounts produces a table containing the counted reads, per gene, per row. Optionally the last column can be set to be the effective gene-length. These tables are compatible with the DESeq2 Galaxy wrapper by IUC. + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btt656</citation> + </citations> +</tool>