Mercurial > repos > bgruening > hicexplorer_hicbuildmatrixmicroc
comparison hicBuildMatrixMicroC.xml @ 0:0c22e9361298 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 63179f3d35e5dec09cdd01c07c6a4e8af3da777d
author | bgruening |
---|---|
date | Thu, 05 Dec 2024 21:42:17 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0c22e9361298 |
---|---|
1 <tool id="hicexplorer_hicbuildmatrixmicroc" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>create a contact matrix</description> | |
3 <macros> | |
4 <token name="@BINARY@">hicBuildMatrixMicroC</token> | |
5 <import>macros.xml</import> | |
6 </macros> | |
7 <expand macro="requirements"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 | |
10 mkdir ./QCfolder && | |
11 mkdir '$qc.files_path' && | |
12 @BINARY@ | |
13 --samFiles | |
14 #for $repeat in $samFiles: | |
15 '${repeat.samFile}' | |
16 #end for | |
17 | |
18 #if $maxLibraryInsertSize: | |
19 --maxLibraryInsertSize $maxLibraryInsertSize | |
20 #end if | |
21 | |
22 #if $binSizes: | |
23 --binSize | |
24 #for $repeat in $binSizes | |
25 '${repeat.binSize}' | |
26 #end for | |
27 #end if | |
28 | |
29 #if $chromosomeSizes: | |
30 --chromosomeSizes '$chromosomeSizes' | |
31 #end if | |
32 #if $dbKey: | |
33 --genomeAssembly '$dbKey' | |
34 #else | |
35 --genomeAssembly '$samFiles[0].samFile.metadata.dbkey' | |
36 #end if | |
37 | |
38 #if $region: | |
39 --region '$region' | |
40 #end if | |
41 | |
42 --outFileName 'matrix.$outputFormat' | |
43 | |
44 #if $outBam: | |
45 $outBam ./unsorted.bam | |
46 #end if | |
47 | |
48 $keepSelfCircles | |
49 $skipDuplicationCheck | |
50 | |
51 #if $minMappingQuality and $minMappingQuality is not None: | |
52 --minMappingQuality $minMappingQuality | |
53 #end if | |
54 | |
55 --threads @THREADS@ | |
56 | |
57 --QCfolder ./QCfolder | |
58 && | |
59 mv ./QCfolder/* $qc.files_path/ | |
60 && | |
61 mv '$qc.files_path/hicQC.html' '$qc' | |
62 && mv "$qc.files_path"/*.log raw_qc | |
63 && mv matrix.$outputFormat matrix | |
64 #if $outBam: | |
65 && samtools sort -@ @THREADS@ -T "\${TMPDIR:-.}" ./unsorted.bam -o sorted.bam | |
66 #end if | |
67 ]]> | |
68 </command> | |
69 <inputs> | |
70 <!-- can we use multiple=True here with min="2" and max="2" ? --> | |
71 <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file."> | |
72 <param name="samFile" type="data" format="sam,qname_input_sorted.bam"> | |
73 </param> | |
74 </repeat> | |
75 | |
76 <param argument="--maxLibraryInsertSize" type="integer" optional="true" value="" label="Maximum library insert size defines different cut offs based on the maximum expected library size" help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer) | |
77 which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates | |
78 belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate | |
79 is too far away from the nearest restriction site." /> | |
80 | |
81 <repeat name="binSizes" title="Bin size in bp" min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites. | |
82 Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file."> | |
83 <param argument="--binSize" type="integer" optional="true" value="" label="Bin size in bp" /> | |
84 </repeat> | |
85 | |
86 <expand macro="region" /> | |
87 <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue="" label="Keep self circles" help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." /> | |
88 <expand macro="minMappingQuality" /> | |
89 <param argument="--skipDuplicationCheck" type="boolean" truevalue="--skipDuplicationCheck" falsevalue="" label="Skip duplication check" help="Identification of duplicated read pairs is memory consuming. Thus, in case of memory errors this check can be skipped." /> | |
90 <param argument="--chromosomeSizes" type="data" format="tabular" optional="true" label="Chromosome sizes for your genome" help="File with the chromosome sizes for your genome. A tab-delimited two column layout 'chr_name size' is expected | |
91 Usually the sizes can be determined from the SAM/BAM input files, however, | |
92 for cHi-C or scHi-C it can be that at the start or end no data is present. | |
93 Please consider that this option causes that only reads are considered which are on the listed chromosomes. | |
94 Use this option to guarantee fixed sizes. An example file is available via UCSC: | |
95 http://hgdownload.soe.ucsc.edu/goldenPath/dm3/bigZips/dm3.chrom.sizes" /> | |
96 <param name="dbKey" type="text" optional="true" label="Use this dbkey for your history genome" | |
97 help="You can set the reference genome in your history as metadata. In case you have not you can specify it here." /> | |
98 | |
99 <param argument="--outBam" type="boolean" truevalue="--outBam" falsevalue="" checked="false" label="Save valid Hi-C reads in BAM file" help="A bam | |
100 file containing all valid Hi-C reads can be created | |
101 using this option. This bam file could be useful to | |
102 inspect the distribution of valid Hi-C reads pairs or | |
103 for other downstream analyses, but is not used by any | |
104 HiCExplorer tool. Computation will be significantly | |
105 longer if this option is set." /> | |
106 | |
107 <param name="outputFormat" type="select" label="Output file format"> | |
108 <option value="h5">HiCExplorer format</option> | |
109 <option value="cool">cool</option> | |
110 </param> | |
111 </inputs> | |
112 <outputs> | |
113 <data name="outfileBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}"> | |
114 <filter>outBam</filter> | |
115 </data> | |
116 <data name="outFileName" from_work_dir="matrix" format="h5" label="${tool.name} MATRIX on ${on_string}"> | |
117 <change_format> | |
118 <when input="outputFormat" value="cool" format="cool" /> | |
119 </change_format> | |
120 </data> | |
121 <data name="qc" format="html" label="${tool.name} QC on ${on_string}" /> | |
122 <data name="raw_qc" from_work_dir="raw_qc" format="txt" label="${tool.name} raw QC on ${on_string}" /> | |
123 </outputs> | |
124 <tests> | |
125 <test expect_num_outputs="4"> | |
126 <repeat name="samFiles"> | |
127 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" /> | |
128 </repeat> | |
129 <repeat name="samFiles"> | |
130 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" /> | |
131 </repeat> | |
132 <param name="outputFormat" value="h5" /> | |
133 <repeat name="binSizes"> | |
134 <param name="binSize" value="5000" /> | |
135 </repeat> | |
136 <param name="outBam" value="True" /> | |
137 <output name="outfileBam" file="small_test_matrix_result_sorted_microc.bam" compare="diff" lines_diff="2" ftype="bam" /> | |
138 <output name="outFileName" ftype="h5"> | |
139 <assert_contents> | |
140 <has_h5_keys keys="intervals,matrix" /> | |
141 </assert_contents> | |
142 </output> | |
143 <output name="raw_qc" file="raw_qc_report_micro-c" compare="diff" lines_diff="2" /> | |
144 </test> | |
145 <test expect_num_outputs="4"> | |
146 <repeat name="samFiles"> | |
147 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" /> | |
148 </repeat> | |
149 <repeat name="samFiles"> | |
150 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" /> | |
151 </repeat> | |
152 <repeat name="binSizes"> | |
153 <param name="binSize" value="5000" /> | |
154 </repeat> | |
155 <param name="outputFormat" value="cool" /> | |
156 <param name="outBam" value="True" /> | |
157 <output name="outfileBam" file="small_test_matrix_result_sorted_microc.bam" compare="diff" lines_diff="2" ftype="bam" /> | |
158 <output name="outFileName" ftype="cool"> | |
159 <assert_contents> | |
160 <has_h5_keys keys="bins,chroms,indexes,pixels" /> | |
161 </assert_contents> | |
162 </output> | |
163 <output name="raw_qc" file="raw_qc_report_micro-c" compare="diff" lines_diff="2" /> | |
164 </test> | |
165 </tests> | |
166 <help><![CDATA[ | |
167 | |
168 Creation of the contact matrix | |
169 =============================== | |
170 | |
171 | |
172 **hicBuildMatrixMicroC** generates a contact matrix from Micro-C read pairs, using paired-end Hi-C reads mapped to a reference genome. This process requires two SAM or BAM files: one for the first mate and one for the second mate of the paired-end reads. These files must be unaligned by position (i.e., not sorted). Unlike traditional Hi-C data, where restriction enzyme cut sites determine resolution, Micro-C does not rely on such sites. Instead, the contact matrix is created using a fixed bin size (e.g., 10,000 bp). | |
173 | |
174 Additionally, **hicBuildMatrixMicroC** produces a quality control report to evaluate the quality of the Hi-C reads, aiding in determining the success of both the experimental protocol and sequencing process. | |
175 | |
176 | |
177 _________________ | |
178 | |
179 | |
180 Usage | |
181 ----- | |
182 | |
183 | |
184 This tool is designed to work with paired SAM/BAM files generated by alignment software supporting local alignment, such as Bowtie2, using the `--local` alignment option for paired-end reads. Both files should represent properly mapped reads. | |
185 | |
186 _________________ | |
187 | |
188 | |
189 Output | |
190 ------ | |
191 | |
192 **hicBuildMatrixMicroC** generates the following outputs: | |
193 | |
194 - **Contact Matrix**: A matrix compatible with HiCExplorer for downstream analyses. | |
195 - **Accepted Alignments BAM File**: This file includes valid Hi-C read pairs. While not directly used by HiCExplorer, it is valuable for inspecting the distribution of valid reads, such as around restriction enzyme sites, or for other analyses. | |
196 - **Quality Control Report**: This report provides an evaluation of the Hi-C data, helping to determine whether the library preparation and experimental workflow were successful. | |
197 | |
198 | |
199 Example plot | |
200 ++++++++++++ | |
201 | |
202 .. image:: hicPlotMatrix.png | |
203 :width: 50% | |
204 | |
205 *Contact matrix of *Drosophila melanogaster* embryos built using **hicBuildMatrix**. The example shows Micro-C data, visualized with `hicPlotMatrix`. Bins were merged to a 25 kb resolution using `hicMergeMatrixBins` before plotting.* | |
206 | |
207 | |
208 | |
209 | |
210 Quality report | |
211 ++++++++++++++ | |
212 | |
213 A detailed quality control report accompanies the contact matrix. This report is similar to the one generated by **hicBuildMatrix**, but excludes information specific to restriction cut sites, such as dangling ends and self-circles, as these features are not applicable to Micro-C data. | |
214 | |
215 | |
216 _________________ | |
217 | |
218 | For more information about HiCExplorer please consider our documentation on readthedocs.io_. | |
219 | |
220 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html | |
221 ]]> </help> | |
222 <expand macro="citations" /> | |
223 </tool> |