Mercurial > repos > m-zytnicki > mmquant
annotate mmquant.xml @ 1:87c5fa8651c1 draft
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
author | m-zytnicki |
---|---|
date | Wed, 15 Feb 2017 06:03:00 -0500 |
parents | 60abb6540004 |
children | fc9d40c697e8 |
rev | line source |
---|---|
0
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
1 <tool id="mmquant" name="Gene quantification (mmquant)" version="0.1.0"> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
2 <requirements> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
3 <requirement type="package" version="0.1.0">mmquant</requirement> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
4 </requirements> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
5 <stdio> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
6 <exit_code range="1:" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
7 </stdio> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
8 <command><![CDATA[ |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
9 mmquant |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
10 -a "$annotation" |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
11 -r |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
12 #for $r in $reads_info |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
13 ${r.reads.file_name} |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
14 #end for |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
15 -f |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
16 #for $r in $reads_info |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
17 ${r.reads.ext} |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
18 #end for |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
19 -s |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
20 #for $r in $reads_info |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
21 ${r.strand} |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
22 #end for |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
23 -n |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
24 #for $r in $reads_info |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
25 ${r.name} |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
26 #end for |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
27 -l "$overlap" |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
28 "$gene_name" |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
29 -c "$count" |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
30 -m "$merge" |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
31 -o "$output" |
1
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
32 -d "$n_overlap" |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
33 -D "$pc_overlap" |
0
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
34 ]]></command> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
35 <inputs> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
36 <param name="annotation" type="data" label="Annotation" format="gtf" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
37 <repeat name="reads_info" title="Reads" min="1" default="1"> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
38 <param name="reads" type="data" label="Reads" multiple="false" format="sam,bam" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
39 <param name="name" type="text" label="Sample name" value="sample_N" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
40 <param name="strand" type="select" label="Strand" multiple="false" > |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
41 <option value="U" selected="yes">unknown</option> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
42 <option value="FR">forward-reverse (for paired-end reads)</option> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
43 <option value="RF">reverse-forward (for paired-end reads)</option> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
44 <option value="F">forward (for single-end reads)</option> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
45 <option value="R">reverse (for single-end reads)</option> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
46 </param> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
47 </repeat> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
48 <param name="overlap" type="float" value="-1" label="Overlap type" help="<0: read is included, <1: overlap, otherwise: # nt" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
49 <param name="gene_name" type="boolean" label="Print gene name instead of IDs" truevalue="-g" falsevalue="" help="use gene name instead of gene ID in the output file" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
50 <param name="count" type="integer" value="0" min="0" label="Count threshold" help="Do not display genes with less than N reads" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
51 <param name="merge" type="float" value="0.0" min="0.0" max="1.0" label="Merge threshold" help="Merge gene aggregate count with parent aggregate if count is low" /> |
1
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
52 <param name="n_overlap" type="integer" value="30" min="1" label="Difference of overlapping" help="Number of overlapping bp between the best matches and the other matches" /> |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
53 <param name="pc_overlap" type="float" value="0.5" min="0.0" max="1.0" label="Ratio of overlapping" help="Ratio of overlapping bp between the best matches and the other matches" /> |
0
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
54 </inputs> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
55 <outputs> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
56 <data name="output" format="txt" label="${tool.name} on ${on_string}" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
57 </outputs> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
58 <tests> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
59 <test> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
60 <param name="annotation" value="test_mmquant_1.gtf" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
61 <param name="reads" value="test_mmquant_1.sam" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
62 <param name="name" value="test" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
63 <param name="strand" value="U" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
64 <output name="output" file="test_mmquant_1.txt" ftype="txt" /> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
65 </test> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
66 </tests> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
67 <help> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
68 **Why using this tool?** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
69 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
70 This tool counts the number of reads (produced by RNA-Seq) per gene, much like HTSeq-count_ and featureCounts_. The main difference with other tools is that multi-mapping reads are counted differently: if a read is mapped to gene A, gene B, and gene C, the tool will create a new feature, "geneA--geneB--geneC", that will be counted once. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
71 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
72 .. _HTSeq-count: http://www-huber.embl.de/users/anders/HTSeq/doc/overview.html |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
73 .. _featureCounts: http://bioinf.wehi.edu.au/featureCounts/ |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
74 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
75 **Why it matters?** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
76 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
77 Recently, an article_ showed that RNA-Seq quantification tools are not accurate, leading to errors while finding differentially expressed genes. The authors suggest this method, that may not provide the genes that are differentially expressed (something that RNA-Seq alone cannot do), but the groups of genes that are differentially expressed. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
78 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
79 .. _article: http://www.genomebiology.com/2015/16/1/177 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
80 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
81 **Strands** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
82 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
83 Strands can be: |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
84 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
85 * for paired-end reads: ``U`` (unknown), ``FR`` (forward-reverse), ``RF`` (reverse-forward), ``FF`` (forward-forward); |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
86 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
87 * for single-end reads: ``U`` (unknown), ``F`` (forward), ``R`` (reverse); |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
88 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
89 * Default: ``U``. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
90 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
91 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
92 **Annotation file** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
93 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
94 The annotation file should be in GTF. GFF might work too. The tool only uses the gene/transcript/exon types. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
95 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
96 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
97 **Reads files** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
98 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
99 The reads should be given in SAM or BAM format, and be sorted (by position). The reads can be single end or paired-end (or a mixture thereof). |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
100 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
101 You can use the samtools_ to sort them. This tool uses the NH flag (provides the number of hits for each read, see the specification_), so be sure that your mapping tool sets it adequately (yes, TopHat2_ and STAR_ do it fine). You should also check how your mapping tool handles multi-mapping reads (this can usually be tuned using the appropriate parameters). |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
102 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
103 .. _samtools: http://www.htslib.org/ |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
104 .. _specification: https://samtools.github.io/hts-specs/SAMv1.pdf |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
105 .. _TopHat2: http://ccb.jhu.edu/software/tophat/index.shtml |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
106 .. _STAR: https://github.com/alexdobin/STAR/releases |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
107 |
1
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
108 **Read mapping to several genes** |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
109 |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
110 We will suppose here that the ``-l 1`` strategy is used (i.e. a read is attributed to a gene as soon as at least 1 nucleotide overlap). The example can be extended to other strategies as well. |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
111 |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
112 If a read (say, of size 100), maps unambiguously and overlaps with gene A and B, it will be counted as 1 for the new "gene" gene_A--gene_B. However, suppose that only 1 nucleotide overlaps with gene A, whereas 100 nucleotides overlap with gene B (yes, genes A and B overlap). You probably would like to attribute the read to gene B. |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
113 |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
114 The options ``Difference of overlapping`` and ``Ratio of overlapping`` control this. We compute the number of overlapping nucleotides between a read and the overlapping genes. If a read overlaps "significantly" more with one gene than with all the other genes, they will attribute the read to the former gene only. |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
115 |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
116 The option ``Difference of overlapping`` *n* computes the differences of overlapping nucleotides. Let us name *N_A* and *N_B* the number of overlapping nucleotides with genes A and B respectively. If *N_A >= N_B + n*, then the read will be attributed to gene A only. |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
117 |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
118 The option ``Ratio of overlapping`` *m* compares the ratio of overlapping nucleotides. If *N_A / N_B >= m*, then the read will be attributed to gene A only. |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
119 |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
120 If both option ``Difference of overlapping`` *n* and ``Ratio of overlapping`` *m* are used, then the read will be attributed to gene A only iff both *N_A >= N_B + n* and *N_A / N_B >= m*. |
87c5fa8651c1
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
0
diff
changeset
|
121 |
0
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
122 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
123 **Output file** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
124 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
125 The output is a tab-separated file, to be use in EdgeR or DESeq, for instance. If the user provided *n* reads files, the output will contain *n+1* columns: |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
126 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
127 ============== ======== ======== === |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
128 Gene sample_1 sample_2 ... |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
129 ============== ======== ======== === |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
130 gene_A ... ... ... |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
131 gene_B ... ... ... |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
132 gene_B--gene_C ... ... ... |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
133 ============== ======== ======== === |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
134 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
135 The first line is the ID of the genes. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
136 If a read maps several genes (say, gene_B and gene_C), a new feature is added to the table, gene_B--gene_C. The reads that can be mapped to these genes will be counted there (but not in the gene_B nor gene_C lines). |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
137 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
138 With the ``Print names`` option, the gene names are used instead of gene IDs. If two different genes have the same name, the systematic name is added, like: ``Mat2a (ENSMUSG00000053907)``. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
139 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
140 Note that the gene IDs and gene names should be given in the GTF file after the ``gene_id`` and ``gene_name`` tags respectively. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
141 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
142 **Output stats** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
143 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
144 The output stats are given in standard error. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
145 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
146 The general shape is:: |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
147 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
148 Results for sample_A: |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
149 # hits: N |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
150 # uniquely mapped reads: N (x%) |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
151 # ambiguous hits: N (x%) |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
152 # non-uniquely mapped hits: N (x%) |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
153 # unassigned hits: N (x%) |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
154 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
155 These figures mainly provide stats on hits; one sequence may have zero, one, or several hits. An ambiguous hit is a hit that overlaps several annotation features. A non-uniquely mapped hit belongs to a sequence that maps several loci in the genome. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
156 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
157 **Overlap** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
158 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
159 The way a read R is mapped to a gene A depends on the overlap *n* value: |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
160 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
161 ==================== =============================================== |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
162 if *n* is then R is mapped to A iff |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
163 ==================== =============================================== |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
164 a negative value R is included in A |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
165 a positive integer they have at least *n* nucleotides in common |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
166 a float value (0, 1) *n* % of the nucleotides of R are shared with A |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
167 ==================== =============================================== |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
168 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
169 **Merge Threshold** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
170 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
171 Sometimes, there are very few reads that can be mapped unambiguously to a gene A, because it is very similar to gene B. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
172 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
173 ============== ========== |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
174 Gene sample_1 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
175 ============== ========== |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
176 gene_A *x* |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
177 gene_B *y* |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
178 gene_A--gene_B *z* |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
179 ============== ========== |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
180 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
181 In the previous example, suppose that *x << z*. In this case, you can move all the reads from gene_A to gene_A--gene_B, using the merge threshold *t*, a float in (0, 1). If *x < t* x *y*, then the reads are transferred. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
182 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
183 **Count Threshold** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
184 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
185 If the maximum number of reads for a gene is less than the count threshold (a non-negative integer), then the corresponding line is discarded. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
186 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
187 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
188 **Contact** |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
189 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
190 Comment? Suggestion? Do not hesitate sending me an email_. |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
191 |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
192 .. _email: mailto:matthias.zytnicki@toulouse.inra.fr |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
193 </help> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
194 <citations> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
195 <citation type="bibtex"> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
196 @misc{bitbucketmmquant, |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
197 author = {Zytnicki.}, |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
198 year = {2016}, |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
199 title = {multi-mapping-counter}, |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
200 publisher = {BitBucket}, |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
201 journal = {BitBucket repository}, |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
202 url = {https://bitbucket.org/mzytnicki/multi-mapping-counter}, |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
203 }</citation> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
204 </citations> |
60abb6540004
planemo upload commit fb76aa0a938a2498d3206e6039bc1d9906e6c2ce-dirty
m-zytnicki
parents:
diff
changeset
|
205 </tool> |