Mercurial > repos > iuc > umi_tools_group
comparison umi-tools_group.xml @ 1:f73f13641bb6 draft
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
author | iuc |
---|---|
date | Wed, 10 Jan 2018 19:09:28 -0500 |
parents | 860bc357b678 |
children | a24f5b991320 |
comparison
equal
deleted
inserted
replaced
0:860bc357b678 | 1:f73f13641bb6 |
---|---|
2 <description>Extract UMI from fastq files</description> | 2 <description>Extract UMI from fastq files</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements"> | 6 <expand macro="requirements"> |
7 <requirement type="package" version="1.5">samtools</requirement> | 7 <requirement type="package" version="1.6">samtools</requirement> |
8 </expand> | 8 </expand> |
9 <command detect_errors="exit_code"><![CDATA[ | 9 <command detect_errors="exit_code"><![CDATA[ |
10 #if $input.is_of_type("sam"): | 10 #if $input.is_of_type("sam"): |
11 #set $input_file = $input | 11 #set $input_file = $input |
12 #else: | 12 #else: |
13 ln -sf '${input}' 'input.bam' && | 13 ln -sf '${input}' 'input.bam' && |
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' && | 14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' && |
15 #set $input_file = 'input.bam' | 15 #set $input_file = 'input.bam' |
16 #end if | 16 #end if |
17 | 17 |
18 umi_tools group --extract-umi-method $extract_umi_method | 18 umi_tools group |
19 --random-seed 0 | |
20 --extract-umi-method $extract_umi_method | |
19 #if str($extract_umi_method) != 'read_id': | 21 #if str($extract_umi_method) != 'read_id': |
20 --umi-separator '$umi_separator' --umi-tag $umi_tag | 22 --umi-separator '$umi_separator' --umi-tag '$umi_tag' |
21 #end if | 23 #end if |
22 --method $method --edit-distance-threshold $edit_distance_threshold | 24 --method $method --edit-distance-threshold $edit_distance_threshold |
23 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold | 25 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold |
24 $read_length $whole_contig --subset $subset $per_contig $per_gene | 26 $read_length $whole_contig --subset $subset $per_contig $per_gene |
25 #if $gene_transcript_map: | 27 #if $gene_transcript_map: |
48 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" /> | 50 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" /> |
49 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." /> | 51 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." /> |
50 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position"> | 52 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position"> |
51 <option value="unique">Reads group share the exact same UMI</option> | 53 <option value="unique">Reads group share the exact same UMI</option> |
52 <option value="cluster">Identify clusters based on hamming distance</option> | 54 <option value="cluster">Identify clusters based on hamming distance</option> |
53 <option value="directional">Identify clusters based on distance and counts</option> | 55 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option> |
54 </param> | 56 </param> |
55 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (>14bp)" /> | 57 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (>14bp)" /> |
56 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." /> | 58 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." /> |
57 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" /> | 59 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" /> |
58 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." /> | 60 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." /> |
59 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" /> | 61 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" /> |
60 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" /> | 62 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" /> |
61 <param argument="--subset" type="float" min="0" max="1" value="1" label="Only consider a random selection of the reads" /> | 63 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" /> |
62 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" /> | 64 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" /> |
63 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" /> | 65 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" /> |
64 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." /> | 66 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." /> |
65 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" /> | 67 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" /> |
66 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." /> | 68 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." /> |
71 <filter>group_out</filter> | 73 <filter>group_out</filter> |
72 </data> | 74 </data> |
73 </outputs> | 75 </outputs> |
74 <tests> | 76 <tests> |
75 <test> | 77 <test> |
76 <param name="input" value="group_in1.sam" ftype="sam" /> | |
77 <param name="extract_umi_method" value="read_id" /> | |
78 <param name="method" value="unique" /> | |
79 <output name="output" file="group_out1.bam" /> | |
80 </test> | |
81 <test> | |
82 <param name="input" value="group_in2.bam" ftype="bam" /> | 78 <param name="input" value="group_in2.bam" ftype="bam" /> |
83 <param name="extract_umi_method" value="read_id" /> | 79 <param name="extract_umi_method" value="read_id" /> |
84 <param name="paired" value="True" /> | 80 <param name="paired" value="True" /> |
85 <param name="method" value="unique" /> | 81 <param name="method" value="unique" /> |
86 <output name="output" file="group_out2.bam" /> | 82 <output name="output" file="group_out2.bam" ftype="bam" sort="True" /> |
87 </test> | 83 </test> |
88 <test> | 84 <test> |
89 <param name="input" value="group_in3.bam" ftype="bam" /> | 85 <param name="input" value="group_in3.bam" ftype="bam" /> |
90 <param name="extract_umi_method" value="read_id" /> | 86 <param name="extract_umi_method" value="read_id" /> |
91 <param name="group_output" value="True" /> | 87 <param name="group_output" value="True" /> |
92 <param name="method" value="unique" /> | 88 <param name="method" value="unique" /> |
93 <output name="group_out" file="group_out3.tab" /> | 89 <output name="group_out" file="group_out3.tab" /> |
94 <output name="output" file="group_out3.bam" /> | 90 <output name="output" file="group_out3.bam" ftype="bam" sort="True" /> |
95 </test> | 91 </test> |
96 <test> | 92 <test> |
97 <param name="input" value="group_in4.bam" ftype="bam" /> | 93 <param name="input" value="group_in4.bam" ftype="bam" /> |
98 <param name="extract_umi_method" value="tag" /> | 94 <param name="extract_umi_method" value="tag" /> |
99 <param name="umi_tag" value="BX" /> | 95 <param name="umi_tag" value="BX" /> |
100 <param name="method" value="unique" /> | 96 <param name="method" value="unique" /> |
101 <output name="group_out" file="group_out4.tab" /> | 97 <output name="group_out" file="group_out4.tab" /> |
102 <output name="output" file="group_out4.bam" /> | 98 <output name="output" file="group_out4.bam" ftype="bam" sort="True" /> |
103 </test> | 99 </test> |
104 <test> | 100 <test> |
105 <param name="input" value="group_in5.bam" ftype="bam" /> | 101 <param name="input" value="group_in5.bam" ftype="bam" /> |
106 <param name="extract_umi_method" value="read_id" /> | 102 <param name="extract_umi_method" value="read_id" /> |
107 <param name="umi_tag" value="BX" /> | 103 <param name="umi_tag" value="BX" /> |
108 <param name="method" value="cluster" /> | 104 <param name="method" value="cluster" /> |
109 <output name="output" file="group_out5.bam" /> | 105 <output name="output" file="group_out5.bam" ftype="bam" sort="True" /> |
110 </test> | 106 </test> |
111 <test> | 107 <test> |
112 <param name="input" value="group_in6.bam" ftype="bam" /> | 108 <param name="input" value="group_in6.bam" ftype="bam" /> |
113 <param name="extract_umi_method" value="read_id" /> | 109 <param name="extract_umi_method" value="read_id" /> |
114 <param name="umi_tag" value="BX" /> | 110 <param name="umi_tag" value="BX" /> |
115 <param name="method" value="directional" /> | 111 <param name="method" value="directional" /> |
116 <output name="output" file="group_out6.bam" /> | 112 <output name="output" file="group_out6.bam" ftype="bam" sort="True" /> |
117 </test> | 113 </test> |
118 </tests> | 114 </tests> |
119 <help><![CDATA[ | 115 <help><![CDATA[ |
120 umi_tools group - Group reads based on their UMI | 116 umi_tools group - Group reads based on their UMI |
121 ================================================ | 117 ================================================ |