Mercurial > repos > yhoogstrate > featurecounts
changeset 3:e04fbcc4e91a draft default tip
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/featurecounts_galaxy_wrapper commit 597fa3df643b54ea93a17448c722f657e3d68b60
author | yhoogstrate |
---|---|
date | Wed, 07 Oct 2015 11:38:41 -0400 |
parents | b5c93611d2c5 |
children | |
files | README.rst featurecounts.xml test-data/output_summary.tab tool_dependencies.xml |
diffstat | 4 files changed, 81 insertions(+), 87 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Thu Sep 10 05:11:49 2015 -0400 +++ b/README.rst Wed Oct 07 11:38:41 2015 -0400 @@ -52,4 +52,4 @@ Acknowledgements ---------------- -I would like to thank Marius van den Beek for his contributions to this project. +I would like to thank Marius van den Beek and Björn Grüning for their contributions to this project.
--- a/featurecounts.xml Thu Sep 10 05:11:49 2015 -0400 +++ b/featurecounts.xml Wed Oct 07 11:38:41 2015 -0400 @@ -1,14 +1,12 @@ -<?xml version="1.0" encoding="UTF-8"?> -<tool id="featurecounts" name="featureCounts" version="1.4.6.p1"> +<tool id="featurecounts" name="featureCounts" version="1.4.6.p5"> <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> <requirements> - <requirement type="package" version="1.4.6.p1">featurecounts</requirement> - <requirement type="package" version="1.0.0">featurecounts2bed</requirement> + <requirement type="package" version="1.4.6.p5">featurecounts</requirement> </requirements> - <version_command>featureCounts -v</version_command> + <version_command>featureCounts -v 2>&1 | grep .</version_command> <command><![CDATA[ + ## Check 01: do the alignments have a dbkey and is the option set to using it? - #if $reference_gene_sets_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/gene_sets.loc' is configured properly!" >&2 #else @@ -17,7 +15,7 @@ echo "Either all files must be SAM or all files must be BAM, no mixture is allowed." >&2 #else featureCounts - -a + -a #if $reference_gene_sets_source.source_select == "indexed_filtered" "$reference_gene_sets_source.reference_gene_sets" #else if $reference_gene_sets_source.source_select == "indexed_all" @@ -33,10 +31,10 @@ *# "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'gene_sets' ].get_fields() )[0][2] }" #end if - - -o "$output" + + -o "${output}" -T \${GALAXY_SLOTS:-2} - + #if $extended_parameters.parameters == "extended" -t $extended_parameters.gff_feature_type -g $extended_parameters.gff_feature_attribute @@ -51,55 +49,53 @@ -D $extended_parameters.maximum_fragment_length $extended_parameters.only_both_ends $extended_parameters.exclude_chimerics - $extended_parameters.namesort #end if - + #for $alignment in $alignments ${alignment} #end for - + 2>&1 - + #set $columns = [str(i+7) for i, alignment in enumerate($alignments)] #set $columns=",".join($columns) #if $format == "tabdel_default" or $format.value == "tabdel_default" - ; cp $output tmp.txt - ; egrep -v "^#" tmp.txt > tmp2.txt - ; cut -f 1,$columns tmp2.txt > tmp_left.txt - ; cut -f 6 tmp2.txt > tmp_right.txt - ; paste tmp_left.txt tmp_right.txt > $output + && cp $output tmp.txt + && egrep -v "^#" tmp.txt > tmp2.txt + && cut -f 1,$columns tmp2.txt > tmp_left.txt + && cut -f 6 tmp2.txt > tmp_right.txt + && paste tmp_left.txt tmp_right.txt > $output #elif $format == "tabdel_short" or $format.value == "tabdel_short" - ; cp $output tmp.txt - ; egrep -v "^#" tmp.txt | cut -f 1,$columns > $output + && cp $output tmp.txt + && egrep -v "^#" tmp.txt | cut -f 1,$columns > $output #end if - + ## For every alignment, replace its filename for: "hid: sample name" #for $alignment in $alignments #set $alignment_escaped = str($alignment).replace('/', '\/').replace('.', '\.') #set $alignment_name_escaped = str(alignment.hid)+": "+str($alignment.name).replace('\t',' ').replace('\\','\\\\').replace("'","\\'").replace('/','\/') - + #if $format.value == "tabdel_default" or $format.value == "tabdel_short" - ; sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt + && sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt #elif $format.value == "bed": - ; featurecounts2bed.sh -f "$output" > tmp.txt + && $__tool_directory__/featurecounts2bed.sh -f "$output" > tmp.txt #else - ; sed -e '1,2 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt + && sed -e '1,2 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt #end if - - ; mv tmp.txt $output - - ; sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output".summary" > tmp.txt - ; mv tmp.txt $output".summary" + + && mv tmp.txt "${output}" + + && sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output".summary" > tmp.txt + && mv tmp.txt ${output}".summary" #end for - ; mv $output".summary" $output_summary + && mv ${output}".summary" "${output_summary}" #end if #end if ]]></command> - <inputs> <param name="alignments" type="data" format="bam,sam" label="Alignment file" help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files in the series must be in THE SAME format." multiple="true" /> - + <!-- Find out how to access the the GTF/GFF file(s) --> <conditional name="reference_gene_sets_source"> <param name="source_select" type="select" label="GFF/GTF Source"> @@ -136,14 +132,12 @@ <!-- Do nothing, determine GTF/GFF file at runtime --> </when> </conditional> - <param name="format" type="select" label="Output format"> + <option value="tabdel_default">Gene-name "\t" gene-count "\t" gene-length</option> + <option value="tabdel_short" selected="true">Gene-name "\t" gene-count</option> + <option value="bed">BED format (line per exon): chr "\t" start "\t" stop "\t" description "\t" readcount</option> <option value="complex">featureCounts 1.4.0+ default (extensive; complex)</option> - <option value="tabdel_default" selected="true">Gene-name "\t" gene-count "\t" gene-length (tab-delimited)</option> - <option value="tabdel_short">Gene-name "\t" gene-count (tab-delimited)</option> - <option value="bed">BED format (line per exon): chr "\t" start "\t" stop "\t" description "\t" readcount (tab-delimited)</option> </param> - <conditional name="extended_parameters"> <param name="parameters" type="select" label="featureCounts parameters" help="For more advanced featureCounts settings."> <option value="default">Default settings</option> @@ -152,49 +146,67 @@ <when value="default"> </when> <when value="extended"> - <param name="gff_feature_type" type="text" value="exon" label="GFF feature type filter" help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." /> - - <param name="gff_feature_attribute" type="text" value="gene_id" label="GFF gene identifier" help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." /> - - <param name ="summarization_level" type="boolean" truevalue=" -f" falsevalue="" label="On feature level" help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> - - <param name ="contribute_to_multiple_features" type="boolean" truevalue=" -O" falsevalue="" label="Allow read to contribute to multiple features" help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" /> - - <param name="protocol" type="select" label="Strand specific protocol" help="Indicate if strand-specific read counting should be performed. It has three possible values: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). 0 by default."> + <param name="gff_feature_type" type="text" value="exon" + label="GFF feature type filter" + help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." /> + + <param name="gff_feature_attribute" type="text" value="gene_id" + label="GFF gene identifier" + help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." /> + + <param name ="contribute_to_multiple_features" type="boolean" truevalue=" -O" falsevalue="" + label="Allow read to contribute to multiple features" + help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" /> + + <param name="protocol" type="select" label="Strand specific protocol" + help="Indicate if strand-specific read counting should be performed."> <option value=" -s 0" selected="true">Unstranded</option> <option value=" -s 1">Stranded (forwards)</option> <option value=" -s 2">Stranded (reverse)</option> </param> - - <param name="multimapping_counts" type="boolean" truevalue=" -M" falsevalue="" label="Count multi-mapping reads/fragments" help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads." /> - - <param name="mapping_quality" type="integer" value="0" label="Minimum read quality" help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default." /> - - <param name="fragment_counting" type="boolean" truevalue=" -p" falsevalue="" label="PE: Count fragments instead of reads" help="Paired-end specific: If specified, fragments (or templates) will be counted instead of reads. The two reads from the same fragment must be adjacent to each other in the provided SAM/BAM file. If SAM/BAM input does not meet this requirement, the -S (sorting) option should be provided as well." /> - - <param name="check_distance" type="boolean" truevalue=" -P" falsevalue="" label="PE: Check paired-end distance" help="Paired-end specific: If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." /> - + + <param name="multimapping_counts" type="boolean" truevalue=" -M" falsevalue="" + label="Count multi-mapping reads/fragments" + help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads." /> + + <param name="mapping_quality" type="integer" value="12" label="Minimum read quality" + help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." /> + + <param name="fragment_counting" type="boolean" truevalue=" -p" falsevalue="" checked="true" + label="PE: Count fragments instead of reads" + help="Paired-end specific: If specified, fragments (or templates) will be counted instead of reads." /> + + <param name="check_distance" type="boolean" truevalue=" -P" falsevalue="" + label="PE: Check paired-end distance" + help="Paired-end specific: If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." /> + <param name="minimum_fragment_length" type="integer" value="50" label="PE: Minimum fragment/template length." /> <param name="maximum_fragment_length" type="integer" value="600" label="PE: Maximum fragment/template length." /> - - <param name="only_both_ends" type="boolean" truevalue=" -B" falsevalue="" label="PE: only allow fragments with both reads aligned" help="Paired-end specific: If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." /> - - <param name="exclude_chimerics" type="boolean" truevalue=" -C" falsevalue="" label="PE: Exclude chimeric fragments" help="Paired-end specific: If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." /> - - <param name="namesort" type="boolean" truevalue=" -S" falsevalue="" label="PE: Name-sort reads (slow!)" help="Paired-end specific: If specified, the program will reorder input reads according to their names and make reads from the same pair be adjacent to each other. This option should be provided when reads from the same pair are not adjacent to each other in input SAM/BAM files (for instance sorting reads by chromosomal locations could decouple reads from the same pair)." /> + + <param name="only_both_ends" type="boolean" truevalue=" -B" falsevalue="" + label="PE: only allow fragments with both reads aligned" + help="Paired-end specific: If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." /> + + <param name="exclude_chimerics" type="boolean" truevalue=" -C" falsevalue="" checked="true" + label="PE: Exclude chimeric fragments" + help="Paired-end specific: If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." /> + + <param name ="summarization_level" type="boolean" truevalue=" -f" falsevalue="" + label="On feature level" + help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> + </when> </conditional> </inputs> - <outputs> <data format="tabular" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" /> <data format="tabular" name="output_summary" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])} summary" /> </outputs> - <tests> <test> <param name="alignments" value="featureCounts_input1.bam,featureCounts_input2.bam" ftype="bam" /> <param name="source_select" value="history" /> + <param name="format" value="tabdel_default" /> <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> <output name="output" file="output.tab"/> <output name="output_summary" file="output_summary.tab"/>
--- a/test-data/output_summary.tab Thu Sep 10 05:11:49 2015 -0400 +++ b/test-data/output_summary.tab Wed Oct 07 11:38:41 2015 -0400 @@ -5,7 +5,7 @@ Unassigned_NoFeatures 6078 6344 Unassigned_Unmapped 0 0 Unassigned_MappingQuality 0 0 -Unassigned_FragementLength 0 0 +Unassigned_FragmentLength 0 0 Unassigned_Chimera 0 0 Unassigned_Secondary 0 0 Unassigned_Nonjunction 0 0
--- a/tool_dependencies.xml Thu Sep 10 05:11:49 2015 -0400 +++ b/tool_dependencies.xml Wed Oct 07 11:38:41 2015 -0400 @@ -1,24 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="featurecounts" version="1.4.6.p1"> - <repository changeset_revision="42796f06052f" name="package_featurecounts_1_4_6" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - - <package name="featurecounts2bed" version="1.0.0"> - <install version="1.0"> - <actions> - <action type="shell_command"> - mkdir $INSTALL_DIR/bin && - cp $REPOSITORY_INSTALL_DIR/featurecounts2bed.sh $INSTALL_DIR/bin/ - </action> - <action type="chmod"> - <file mode="755">$INSTALL_DIR/bin/featurecounts2bed.sh</file> - </action> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - <environment_variable action="prepend_to" name="PATH">$REPOSITORY_INSTALL_DIR</environment_variable> - </action> - </actions> - </install> + <package name="featurecounts" version="1.4.6.p5"> + <repository changeset_revision="600e51134f8d" name="package_featurecounts_1_4_6_p5" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>