comparison temp_insertions.xml @ 21:9672fe07a232 draft default tip

planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
author mvdbeek
date Mon, 05 Dec 2016 09:58:47 -0500
parents
children
comparison
equal deleted inserted replaced
20:6e02b9179a24 21:9672fe07a232
1 <tool id ="TEMP_insertions" name="TEMP Insertion" version="0.3.0">
2 <description>finds TE insertions relative to reference</description>
3 <requirements>
4 <requirement type="package" version="1.6.924=pl5.22.0_0">perl-bioperl</requirement>
5 <requirement type="package" version="0.7.13">bwa</requirement>
6 <requirement type="package" version="2.25.0">bedtools</requirement>
7 <requirement type="package" version="324">ucsc-twobittofa</requirement>
8 <requirement type="package" version="1.3.1">samtools</requirement>
9 </requirements>
10 <stdio>
11 <exit_code range="1:" />
12 </stdio>
13 <command><![CDATA[
14 ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai &&
15 ln -f -s "$alignment" alignment.sorted.bam &&
16 bash $__tool_directory__/scripts/TEMP_Insertion.sh
17 -x "$minimum_score_difference"
18 -i alignment.sorted.bam
19 -s $__tool_directory__/scripts
20 -r "$consensus_te_seqs"
21 -t "$te_locations"
22 #if $te_families:
23 -u "$te_families"
24 #end if
25 -m "$mismatches"
26 -f "$median_insertsize"
27 -c \${GALAXY_SLOTS:-2} &&
28 mv alignment.insertion.refined.bp.summary $insertion_summary
29 ]]></command>
30 <inputs>
31 <param format="bam" name="alignment" type="data" label="Alignment bam file"/>
32 <param format="fasta" name="consensus_te_seqs" type="data" label="Consensus TE Seqs fasta file"/>
33 <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/>
34 <param format="tabular" name="te_families" type="data" optional="True" label="TE Identifiers and Families"/>
35 <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/>
36 <param name="mismatches" min="0" max="5" type="integer" value="3" label="Allow this many mismatches when aligning to TEs"/>
37 <param name="minimum_score_difference" type="integer" min="0" max="37" value="30" label="Minimum difference between mapping scores"/>
38 </inputs>
39 <outputs>
40 <data format="bed" name="insertion_summary" label="${alignment.element_identifier} Insertions" />
41 </outputs>
42 <tests>
43 <test>
44 <param name="alignment" value="test_chromosome.sorted.bam" ftype="bam"/>
45 <param name="consensus_te_seqs" value="test_concensus.fa" ftype="fasta"/>
46 <param name="te_locations" value="test_TE_annotation.bed" ftype="bed"/>
47 <param name="median_insertsize" value="500" ftype="integer"/>
48 <param name="minimum_score_difference" value="0" ftype="integer"/>
49 <output name="insertion_summary" file="test_insertions_out.bed" ftype="bed" compare="sim_size"/>
50 </test>
51 </tests>
52 <help> <![CDATA[
53
54
55 TEMP
56 -------------
57 TEMP is a software package for detecting transposable elements (TEs) insertions and absences from pooled high-throughput sequencing data
58
59 Current version v1.04
60
61 Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA
62
63 *Input files/variables*
64 -------------------------
65 * Alignment file in BAM format
66 * Reference genome used in aligning, in fasta or twobit format.
67 * Transposable Elements' Consensus Sequences in fasta format.
68 * Annotations of TEs in reference genome in bed format.
69 * TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results.
70 * Median Insert Length
71 * Number of Mismatches allowed (default 3)
72 * Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped.
73
74 *Output files*
75 -----------------
76 * **In the Insertions output file there are 14 columns:**
77 * Column 1: The chromosome where the detected insertion happens.
78 * Column 2: The coordinate of the start position of the detected insertion.
79 * Column 3: The coordinate of the end position of the detected insertion.
80 * Column 4: The TE family that the detected insertion belongs to.
81 * Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand.
82 * Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side.
83 * Column 7: The total number of read pairs that support the detected insertion.
84 * Column 8: The estimated population frequency of the detected insertion.
85 * Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0.
86 * Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand.
87 * Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads).
88 * Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads).
89
90
91 -----
92
93
94 * **In the Absences output file there are 14 columns:**
95 * Column 1: The chromosome where the detected absence happens.
96 * Column 2: The coordinate of the start position of the detected absence.
97 * Column 3: The coordinate of the end position of the detected absence.
98 * Column 4: The TE family that the detected insertion belongs to.
99 * Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
100 * Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands.
101 * Column 7: The number of reads supporting the absence.
102 * Column 8: The number of reads supporting the reference (no absence).
103 * Column 9: Estimated population frequency of the detected absence event.
104
105
106 ]]> </help>
107 <citations>
108 <citation type="doi">10.1093/nar/gku323</citation>
109 </citations>
110 </tool>