comparison hicBuildMatrixMicroC.xml @ 0:0c22e9361298 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 63179f3d35e5dec09cdd01c07c6a4e8af3da777d
author bgruening
date Thu, 05 Dec 2024 21:42:17 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0c22e9361298
1 <tool id="hicexplorer_hicbuildmatrixmicroc" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>create a contact matrix</description>
3 <macros>
4 <token name="@BINARY@">hicBuildMatrixMicroC</token>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9
10 mkdir ./QCfolder &&
11 mkdir '$qc.files_path' &&
12 @BINARY@
13 --samFiles
14 #for $repeat in $samFiles:
15 '${repeat.samFile}'
16 #end for
17
18 #if $maxLibraryInsertSize:
19 --maxLibraryInsertSize $maxLibraryInsertSize
20 #end if
21
22 #if $binSizes:
23 --binSize
24 #for $repeat in $binSizes
25 '${repeat.binSize}'
26 #end for
27 #end if
28
29 #if $chromosomeSizes:
30 --chromosomeSizes '$chromosomeSizes'
31 #end if
32 #if $dbKey:
33 --genomeAssembly '$dbKey'
34 #else
35 --genomeAssembly '$samFiles[0].samFile.metadata.dbkey'
36 #end if
37
38 #if $region:
39 --region '$region'
40 #end if
41
42 --outFileName 'matrix.$outputFormat'
43
44 #if $outBam:
45 $outBam ./unsorted.bam
46 #end if
47
48 $keepSelfCircles
49 $skipDuplicationCheck
50
51 #if $minMappingQuality and $minMappingQuality is not None:
52 --minMappingQuality $minMappingQuality
53 #end if
54
55 --threads @THREADS@
56
57 --QCfolder ./QCfolder
58 &&
59 mv ./QCfolder/* $qc.files_path/
60 &&
61 mv '$qc.files_path/hicQC.html' '$qc'
62 && mv "$qc.files_path"/*.log raw_qc
63 && mv matrix.$outputFormat matrix
64 #if $outBam:
65 && samtools sort -@ @THREADS@ -T "\${TMPDIR:-.}" ./unsorted.bam -o sorted.bam
66 #end if
67 ]]>
68 </command>
69 <inputs>
70 <!-- can we use multiple=True here with min="2" and max="2" ? -->
71 <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file.">
72 <param name="samFile" type="data" format="sam,qname_input_sorted.bam">
73 </param>
74 </repeat>
75
76 <param argument="--maxLibraryInsertSize" type="integer" optional="true" value="" label="Maximum library insert size defines different cut offs based on the maximum expected library size" help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer)
77 which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates
78 belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate
79 is too far away from the nearest restriction site." />
80
81 <repeat name="binSizes" title="Bin size in bp" min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites.
82 Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file.">
83 <param argument="--binSize" type="integer" optional="true" value="" label="Bin size in bp" />
84 </repeat>
85
86 <expand macro="region" />
87 <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue="" label="Keep self circles" help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." />
88 <expand macro="minMappingQuality" />
89 <param argument="--skipDuplicationCheck" type="boolean" truevalue="--skipDuplicationCheck" falsevalue="" label="Skip duplication check" help="Identification of duplicated read pairs is memory consuming. Thus, in case of memory errors this check can be skipped." />
90 <param argument="--chromosomeSizes" type="data" format="tabular" optional="true" label="Chromosome sizes for your genome" help="File with the chromosome sizes for your genome. A tab-delimited two column layout 'chr_name size' is expected
91 Usually the sizes can be determined from the SAM/BAM input files, however,
92 for cHi-C or scHi-C it can be that at the start or end no data is present.
93 Please consider that this option causes that only reads are considered which are on the listed chromosomes.
94 Use this option to guarantee fixed sizes. An example file is available via UCSC:
95 http://hgdownload.soe.ucsc.edu/goldenPath/dm3/bigZips/dm3.chrom.sizes" />
96 <param name="dbKey" type="text" optional="true" label="Use this dbkey for your history genome"
97 help="You can set the reference genome in your history as metadata. In case you have not you can specify it here." />
98
99 <param argument="--outBam" type="boolean" truevalue="--outBam" falsevalue="" checked="false" label="Save valid Hi-C reads in BAM file" help="A bam
100 file containing all valid Hi-C reads can be created
101 using this option. This bam file could be useful to
102 inspect the distribution of valid Hi-C reads pairs or
103 for other downstream analyses, but is not used by any
104 HiCExplorer tool. Computation will be significantly
105 longer if this option is set." />
106
107 <param name="outputFormat" type="select" label="Output file format">
108 <option value="h5">HiCExplorer format</option>
109 <option value="cool">cool</option>
110 </param>
111 </inputs>
112 <outputs>
113 <data name="outfileBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}">
114 <filter>outBam</filter>
115 </data>
116 <data name="outFileName" from_work_dir="matrix" format="h5" label="${tool.name} MATRIX on ${on_string}">
117 <change_format>
118 <when input="outputFormat" value="cool" format="cool" />
119 </change_format>
120 </data>
121 <data name="qc" format="html" label="${tool.name} QC on ${on_string}" />
122 <data name="raw_qc" from_work_dir="raw_qc" format="txt" label="${tool.name} raw QC on ${on_string}" />
123 </outputs>
124 <tests>
125 <test expect_num_outputs="4">
126 <repeat name="samFiles">
127 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" />
128 </repeat>
129 <repeat name="samFiles">
130 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" />
131 </repeat>
132 <param name="outputFormat" value="h5" />
133 <repeat name="binSizes">
134 <param name="binSize" value="5000" />
135 </repeat>
136 <param name="outBam" value="True" />
137 <output name="outfileBam" file="small_test_matrix_result_sorted_microc.bam" compare="diff" lines_diff="2" ftype="bam" />
138 <output name="outFileName" ftype="h5">
139 <assert_contents>
140 <has_h5_keys keys="intervals,matrix" />
141 </assert_contents>
142 </output>
143 <output name="raw_qc" file="raw_qc_report_micro-c" compare="diff" lines_diff="2" />
144 </test>
145 <test expect_num_outputs="4">
146 <repeat name="samFiles">
147 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" />
148 </repeat>
149 <repeat name="samFiles">
150 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" />
151 </repeat>
152 <repeat name="binSizes">
153 <param name="binSize" value="5000" />
154 </repeat>
155 <param name="outputFormat" value="cool" />
156 <param name="outBam" value="True" />
157 <output name="outfileBam" file="small_test_matrix_result_sorted_microc.bam" compare="diff" lines_diff="2" ftype="bam" />
158 <output name="outFileName" ftype="cool">
159 <assert_contents>
160 <has_h5_keys keys="bins,chroms,indexes,pixels" />
161 </assert_contents>
162 </output>
163 <output name="raw_qc" file="raw_qc_report_micro-c" compare="diff" lines_diff="2" />
164 </test>
165 </tests>
166 <help><![CDATA[
167
168 Creation of the contact matrix
169 ===============================
170
171
172 **hicBuildMatrixMicroC** generates a contact matrix from Micro-C read pairs, using paired-end Hi-C reads mapped to a reference genome. This process requires two SAM or BAM files: one for the first mate and one for the second mate of the paired-end reads. These files must be unaligned by position (i.e., not sorted). Unlike traditional Hi-C data, where restriction enzyme cut sites determine resolution, Micro-C does not rely on such sites. Instead, the contact matrix is created using a fixed bin size (e.g., 10,000 bp).
173
174 Additionally, **hicBuildMatrixMicroC** produces a quality control report to evaluate the quality of the Hi-C reads, aiding in determining the success of both the experimental protocol and sequencing process.
175
176
177 _________________
178
179
180 Usage
181 -----
182
183
184 This tool is designed to work with paired SAM/BAM files generated by alignment software supporting local alignment, such as Bowtie2, using the `--local` alignment option for paired-end reads. Both files should represent properly mapped reads.
185
186 _________________
187
188
189 Output
190 ------
191
192 **hicBuildMatrixMicroC** generates the following outputs:
193
194 - **Contact Matrix**: A matrix compatible with HiCExplorer for downstream analyses.
195 - **Accepted Alignments BAM File**: This file includes valid Hi-C read pairs. While not directly used by HiCExplorer, it is valuable for inspecting the distribution of valid reads, such as around restriction enzyme sites, or for other analyses.
196 - **Quality Control Report**: This report provides an evaluation of the Hi-C data, helping to determine whether the library preparation and experimental workflow were successful.
197
198
199 Example plot
200 ++++++++++++
201
202 .. image:: hicPlotMatrix.png
203 :width: 50%
204
205 *Contact matrix of *Drosophila melanogaster* embryos built using **hicBuildMatrix**. The example shows Micro-C data, visualized with `hicPlotMatrix`. Bins were merged to a 25 kb resolution using `hicMergeMatrixBins` before plotting.*
206
207
208
209
210 Quality report
211 ++++++++++++++
212
213 A detailed quality control report accompanies the contact matrix. This report is similar to the one generated by **hicBuildMatrix**, but excludes information specific to restriction cut sites, such as dangling ends and self-circles, as these features are not applicable to Micro-C data.
214
215
216 _________________
217
218 | For more information about HiCExplorer please consider our documentation on readthedocs.io_.
219
220 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
221 ]]> </help>
222 <expand macro="citations" />
223 </tool>