annotate mmquant.xml @ 1:87c5fa8651c1 draft

planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
author m-zytnicki
date Wed, 15 Feb 2017 06:03:00 -0500
parents 60abb6540004
children fc9d40c697e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
1 <tool id="mmquant" name="Gene quantification (mmquant)" version="0.1.0">
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
2 <requirements>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
3 <requirement type="package" version="0.1.0">mmquant</requirement>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
4 </requirements>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
5 <stdio>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
6 <exit_code range="1:" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
7 </stdio>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
8 <command><![CDATA[
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
9 mmquant
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
10 -a "$annotation"
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
11 -r
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
12 #for $r in $reads_info
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
13 ${r.reads.file_name}
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
14 #end for
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
15 -f
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
16 #for $r in $reads_info
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
17 ${r.reads.ext}
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
18 #end for
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
19 -s
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
20 #for $r in $reads_info
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
21 ${r.strand}
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
22 #end for
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
23 -n
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
24 #for $r in $reads_info
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
25 ${r.name}
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
26 #end for
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
27 -l "$overlap"
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
28 "$gene_name"
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
29 -c "$count"
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
30 -m "$merge"
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
31 -o "$output"
1
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
32 -d "$n_overlap"
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
33 -D "$pc_overlap"
0
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
34 ]]></command>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
35 <inputs>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
36 <param name="annotation" type="data" label="Annotation" format="gtf" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
37 <repeat name="reads_info" title="Reads" min="1" default="1">
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
38 <param name="reads" type="data" label="Reads" multiple="false" format="sam,bam" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
39 <param name="name" type="text" label="Sample name" value="sample_N" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
40 <param name="strand" type="select" label="Strand" multiple="false" >
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
41 <option value="U" selected="yes">unknown</option>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
42 <option value="FR">forward-reverse (for paired-end reads)</option>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
43 <option value="RF">reverse-forward (for paired-end reads)</option>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
44 <option value="F">forward (for single-end reads)</option>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
45 <option value="R">reverse (for single-end reads)</option>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
46 </param>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
47 </repeat>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
48 <param name="overlap" type="float" value="-1" label="Overlap type" help="&lt;0: read is included, &lt;1: overlap, otherwise: # nt" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
49 <param name="gene_name" type="boolean" label="Print gene name instead of IDs" truevalue="-g" falsevalue="" help="use gene name instead of gene ID in the output file" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
50 <param name="count" type="integer" value="0" min="0" label="Count threshold" help="Do not display genes with less than N reads" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
51 <param name="merge" type="float" value="0.0" min="0.0" max="1.0" label="Merge threshold" help="Merge gene aggregate count with parent aggregate if count is low" />
1
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
52 <param name="n_overlap" type="integer" value="30" min="1" label="Difference of overlapping" help="Number of overlapping bp between the best matches and the other matches" />
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
53 <param name="pc_overlap" type="float" value="0.5" min="0.0" max="1.0" label="Ratio of overlapping" help="Ratio of overlapping bp between the best matches and the other matches" />
0
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
54 </inputs>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
55 <outputs>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
56 <data name="output" format="txt" label="${tool.name} on ${on_string}" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
57 </outputs>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
58 <tests>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
59 <test>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
60 <param name="annotation" value="test_mmquant_1.gtf" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
61 <param name="reads" value="test_mmquant_1.sam" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
62 <param name="name" value="test" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
63 <param name="strand" value="U" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
64 <output name="output" file="test_mmquant_1.txt" ftype="txt" />
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
65 </test>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
66 </tests>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
67 <help>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
68 **Why using this tool?**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
69
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
70 This tool counts the number of reads (produced by RNA-Seq) per gene, much like HTSeq-count_ and featureCounts_. The main difference with other tools is that multi-mapping reads are counted differently: if a read is mapped to gene A, gene B, and gene C, the tool will create a new feature, "geneA--geneB--geneC", that will be counted once.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
71
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
72 .. _HTSeq-count: http://www-huber.embl.de/users/anders/HTSeq/doc/overview.html
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
73 .. _featureCounts: http://bioinf.wehi.edu.au/featureCounts/
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
74
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
75 **Why it matters?**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
76
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
77 Recently, an article_ showed that RNA-Seq quantification tools are not accurate, leading to errors while finding differentially expressed genes. The authors suggest this method, that may not provide the genes that are differentially expressed (something that RNA-Seq alone cannot do), but the groups of genes that are differentially expressed.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
78
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
79 .. _article: http://www.genomebiology.com/2015/16/1/177
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
80
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
81 **Strands**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
82
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
83 Strands can be:
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
84
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
85 * for paired-end reads: ``U`` (unknown), ``FR`` (forward-reverse), ``RF`` (reverse-forward), ``FF`` (forward-forward);
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
86
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
87 * for single-end reads: ``U`` (unknown), ``F`` (forward), ``R`` (reverse);
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
88
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
89 * Default: ``U``.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
90
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
91
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
92 **Annotation file**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
93
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
94 The annotation file should be in GTF. GFF might work too. The tool only uses the gene/transcript/exon types.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
95
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
96
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
97 **Reads files**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
98
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
99 The reads should be given in SAM or BAM format, and be sorted (by position). The reads can be single end or paired-end (or a mixture thereof).
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
100
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
101 You can use the samtools_ to sort them. This tool uses the NH flag (provides the number of hits for each read, see the specification_), so be sure that your mapping tool sets it adequately (yes, TopHat2_ and STAR_ do it fine). You should also check how your mapping tool handles multi-mapping reads (this can usually be tuned using the appropriate parameters).
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
102
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
103 .. _samtools: http://www.htslib.org/
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
104 .. _specification: https://samtools.github.io/hts-specs/SAMv1.pdf
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
105 .. _TopHat2: http://ccb.jhu.edu/software/tophat/index.shtml
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
106 .. _STAR: https://github.com/alexdobin/STAR/releases
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
107
1
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
108 **Read mapping to several genes**
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
109
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
110 We will suppose here that the ``-l 1`` strategy is used (i.e. a read is attributed to a gene as soon as at least 1 nucleotide overlap). The example can be extended to other strategies as well.
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
111
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
112 If a read (say, of size 100), maps unambiguously and overlaps with gene A and B, it will be counted as 1 for the new "gene" gene_A--gene_B. However, suppose that only 1 nucleotide overlaps with gene A, whereas 100 nucleotides overlap with gene B (yes, genes A and B overlap). You probably would like to attribute the read to gene B.
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
113
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
114 The options ``Difference of overlapping`` and ``Ratio of overlapping`` control this. We compute the number of overlapping nucleotides between a read and the overlapping genes. If a read overlaps "significantly" more with one gene than with all the other genes, they will attribute the read to the former gene only.
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
115
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
116 The option ``Difference of overlapping`` *n* computes the differences of overlapping nucleotides. Let us name *N_A* and *N_B* the number of overlapping nucleotides with genes A and B respectively. If *N_A >= N_B + n*, then the read will be attributed to gene A only.
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
117
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
118 The option ``Ratio of overlapping`` *m* compares the ratio of overlapping nucleotides. If *N_A / N_B >= m*, then the read will be attributed to gene A only.
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
119
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
120 If both option ``Difference of overlapping`` *n* and ``Ratio of overlapping`` *m* are used, then the read will be attributed to gene A only iff both *N_A >= N_B + n* and *N_A / N_B >= m*.
87c5fa8651c1 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents: 0
diff changeset
121
0
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
122
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
123 **Output file**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
124
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
125 The output is a tab-separated file, to be use in EdgeR or DESeq, for instance. If the user provided *n* reads files, the output will contain *n+1* columns:
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
126
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
127 ============== ======== ======== ===
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
128 Gene sample_1 sample_2 ...
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
129 ============== ======== ======== ===
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
130 gene_A ... ... ...
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
131 gene_B ... ... ...
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
132 gene_B--gene_C ... ... ...
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
133 ============== ======== ======== ===
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
134
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
135 The first line is the ID of the genes.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
136 If a read maps several genes (say, gene_B and gene_C), a new feature is added to the table, gene_B--gene_C. The reads that can be mapped to these genes will be counted there (but not in the gene_B nor gene_C lines).
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
137
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
138 With the ``Print names`` option, the gene names are used instead of gene IDs. If two different genes have the same name, the systematic name is added, like: ``Mat2a (ENSMUSG00000053907)``.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
139
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
140 Note that the gene IDs and gene names should be given in the GTF file after the ``gene_id`` and ``gene_name`` tags respectively.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
141
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
142 **Output stats**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
143
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
144 The output stats are given in standard error.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
145
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
146 The general shape is::
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
147
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
148 Results for sample_A:
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
149 # hits: N
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
150 # uniquely mapped reads: N (x%)
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
151 # ambiguous hits: N (x%)
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
152 # non-uniquely mapped hits: N (x%)
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
153 # unassigned hits: N (x%)
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
154
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
155 These figures mainly provide stats on hits; one sequence may have zero, one, or several hits. An ambiguous hit is a hit that overlaps several annotation features. A non-uniquely mapped hit belongs to a sequence that maps several loci in the genome.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
156
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
157 **Overlap**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
158
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
159 The way a read R is mapped to a gene A depends on the overlap *n* value:
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
160
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
161 ==================== ===============================================
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
162 if *n* is then R is mapped to A iff
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
163 ==================== ===============================================
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
164 a negative value R is included in A
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
165 a positive integer they have at least *n* nucleotides in common
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
166 a float value (0, 1) *n* % of the nucleotides of R are shared with A
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
167 ==================== ===============================================
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
168
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
169 **Merge Threshold**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
170
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
171 Sometimes, there are very few reads that can be mapped unambiguously to a gene A, because it is very similar to gene B.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
172
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
173 ============== ==========
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
174 Gene sample_1
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
175 ============== ==========
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
176 gene_A *x*
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
177 gene_B *y*
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
178 gene_A--gene_B *z*
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
179 ============== ==========
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
180
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
181 In the previous example, suppose that *x &lt;&lt; z*. In this case, you can move all the reads from gene_A to gene_A--gene_B, using the merge threshold *t*, a float in (0, 1). If *x &lt; t* x *y*, then the reads are transferred.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
182
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
183 **Count Threshold**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
184
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
185 If the maximum number of reads for a gene is less than the count threshold (a non-negative integer), then the corresponding line is discarded.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
186
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
187
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
188 **Contact**
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
189
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
190 Comment? Suggestion? Do not hesitate sending me an email_.
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
191
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
192 .. _email: mailto:matthias.zytnicki@toulouse.inra.fr
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
193 </help>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
194 <citations>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
195 <citation type="bibtex">
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
196 @misc{bitbucketmmquant,
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
197 author = {Zytnicki.},
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
198 year = {2016},
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
199 title = {multi-mapping-counter},
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
200 publisher = {BitBucket},
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
201 journal = {BitBucket repository},
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
202 url = {https://bitbucket.org/mzytnicki/multi-mapping-counter},
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
203 }</citation>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
204 </citations>
60abb6540004 planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff changeset
205 </tool>