Repository 'yahs'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/yahs

Changeset 6:6756b34312cd (2025-09-17)
Previous changeset 5:ff4031bfaa22 (2024-08-01)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/yahs commit ab918ac1eab72932e78c6e45e46d745543eac810
modified:
yahs.xml
added:
test-data/test2.unsorted.bam
test-data/test3.qname_sorted.bam
b
diff -r ff4031bfaa22 -r 6756b34312cd test-data/test2.unsorted.bam
b
Binary file test-data/test2.unsorted.bam has changed
b
diff -r ff4031bfaa22 -r 6756b34312cd test-data/test3.qname_sorted.bam
b
Binary file test-data/test3.qname_sorted.bam has changed
b
diff -r ff4031bfaa22 -r 6756b34312cd yahs.xml
--- a/yahs.xml Thu Aug 01 11:41:41 2024 +0000
+++ b/yahs.xml Wed Sep 17 06:28:08 2025 +0000
[
b'@@ -2,7 +2,7 @@\n     <description>yet another HI-C scaffolding tool</description>\n     <macros>\n         <token name="@VERSION@">1.2a.2</token>\n-        <token name="@VERSION_SUFFIX@">2</token>\n+        <token name="@VERSION_SUFFIX@">3</token>\n     </macros>\n     <requirements>\n         <requirement type="package" version="@VERSION@">yahs</requirement>\n@@ -12,22 +12,13 @@\n     <command detect_errors="exit_code"><![CDATA[\n         #if $function.function_select == "yahs":\n             ln -s \'$function.fasta\' input.fasta &&\n-            #if $function.bfile.ext == "bam":\n-                ln -s \'$function.bfile\' input.bam &&\n-            #else if $function.bfile.ext == "bed":\n-                ln -s \'$function.bfile\' input.bed &&\n-            #end if\n+            ln -s \'$function.bfile\' input.$function.bfile.ext &&\n             #if $function.agp:\n                 ln -s \'$function.agp\' input.agp &&\n             #end if\n             samtools faidx input.fasta &&\n             mkdir initial_break agp_out agp_break final_outs &&\n-            yahs --no-mem-check input.fasta \n-            #if $function.bfile.ext == "bam":\n-                input.bam\n-            #else if $function.bfile.ext == "bed":\n-                input.bed\n-            #end if\n+            yahs --no-mem-check input.fasta  input.$function.bfile.ext\n             #if $agp:\n                 -a input.agp\n             #end if\n@@ -83,16 +74,16 @@\n             </param>\n             <when value="yahs">\n                 <param name="fasta" type="data" format="fasta" label="Input contig sequences"/>\n-                <param name="bfile" type="data" format="bam,bed" label="Alignment file of Hi-C reads to contigs"/>\n-                <param name="agp" argument="-a" type="data" format="agp" optional="true" label="Input AGP file (for rescaffolding)"\n-                    help="You can specify a AGP format file to ask YaHS to do scaffolding with the scaffolds in the AGP file as the start point"/>\n-                <param name="res" argument="-r" type="text" label="Resolutions" optional="true" \n-                    help="Comma separated, ascending list of range of resolutions with no spaces. Ex. 50000,100000,200000,500000,1000000,2000000,5000000. By default and the upper limit is automatically adjusted with the genome size"/>\n+                <param name="bfile" type="data" format="bam,bed,qname_sorted.bam,unsorted.bam" label="Alignment file of Hi-C reads to contigs. NOTE:  The input BAM could either be sorted by read names (qname_sorted.bam) or not. The behaviours of the program are slightly different, which might lead to slightly different scaffolding results. For a BAM input sorted by read names, with each mapped read pair, a Hi-C link is counted between the middle positions of the read alignments; while for a BAM input sorted by coordinates or unsorted, Hi-C links are counted between the start positions of the read alignments. Also, for a BAM input not sorted by read names, the mapping quality filtering is suppressed (-q option). If a bed file is provided: the BAM file used to genereate BED file need to be filtered out unmapped reads, supplementary/secondary alignment records, and PCR/optical duplicates, and sorted by read names (otherwise the resulted BED file need to be sorted by the read name column)."/>\n+                <param name="agp" argument="-a" type="data" format="agp" optional="true" label="Input AGP file (for rescaffolding)" help="You can specify a AGP format file to ask YaHS to do scaffolding with the scaffolds in the AGP file as the start point"/>\n+                <param name="res" argument="-r" type="text" label="Resolutions" optional="true" help="Comma separated, ascending list of range of resolutions with no spaces. Ex. 50000,100000,200000,500000,1000000,2000000,5000000. By default and the upper limit is automatically adjusted with the genome size">\n+                    <validator type="regex" message="Only Numbers and commas can be used in to define the list of range'..b'nzymes" value="omnic"/>\n+                <param name="fasta" value="test.fasta" ftype="fasta"/>\n+                <param name="bfile" value="test.bed" ftype="bed"/>\n+                <conditional name="enzyme_conditional">\n+                    <param name="enzyme_options" value="preconfigured"/>\n+                    <param name="preconfigured_enzymes" value="omnic"/>\n+                </conditional>\n             </conditional>\n             <param name="log_out" value="yes"/>\n             <output name="log_file" ftype="txt">\n@@ -269,9 +259,46 @@\n                 </assert_contents>\n             </output>\n         </test>\n+        <!-- TEST 8: qname_sorted-->\n+        <test expect_num_outputs="6">\n+            <conditional name="function">\n+                <param name="function_select" value="yahs"/>\n+                <param name="fasta" value="test2.fasta" ftype="fasta"/>\n+                <param name="bfile" value="test3.qname_sorted.bam" ftype="qname_sorted.bam"/>\n+                <param name="res" value="1000,2000,5000,10000,20000,50000,100000,200000,500000"/>\n+                <conditional name="enzyme_conditional">\n+                    <param name="enzyme_options" value="not_specified"/>\n+                </conditional>\n+            </conditional>\n+            <param name="log_out" value="yes"/>\n+            <output name="log_file" ftype="txt">\n+                <assert_contents>\n+                    <has_text text="[I::dump_links_from_bam_file] dumped 6399 read pairs from 17675 records: 6399 intra links + 0 inter links"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- TEST 9: unsorted-->\n+        <test expect_num_outputs="6">\n+            <conditional name="function">\n+                <param name="function_select" value="yahs"/>\n+                <param name="fasta" value="test2.fasta" ftype="fasta"/>\n+                <param name="bfile" value="test2.unsorted.bam" ftype="bam"/>\n+                <param name="res" value="1000,2000,5000,10000,20000,50000,100000,200000,500000"/>\n+                <conditional name="enzyme_conditional">\n+                    <param name="enzyme_options" value="not_specified"/>\n+                </conditional>\n+            </conditional>\n+            <param name="log_out" value="yes"/>\n+            <output name="log_file" ftype="txt">\n+                <assert_contents>\n+                    <has_text text="[I::dump_links_from_bam_file] dumped 6399 read pairs from 17675 records: 6297 intra links + 102 inter links"/>\n+                </assert_contents>\n+            </output>\n+            <!-- COMMAND:  yahs test.fasta test.bam -r 1000,2000,5000,10000,20000,50000,100000,200000,500000 -o test_3 -->\n+        </test>\n     </tests>\n     <help><![CDATA[\n-        YaHS is scaffolding tool using Hi-C data. It relies on a new algothrim for contig joining detection which considers the topological distribution of Hi-C signals aiming to distingush real interaction signals from mapping nosies. YaHS has been tested in a wide range of genome assemblies. Compared to other Hi-C scaffolding tools, it usually generates more contiguous scaffolds - especially with a higher N90 and L90 statistics. It is also super fast - takes less than 5 minutes to reconstruct the human genome from an assembly of 5,483 contigs with ~45X Hi-C data.\n+        YaHS is scaffolding tool using Hi-C data. It relies on a new algorithm for contig joining detection which considers the topological distribution of Hi-C signals aiming to distingush real interaction signals from mapping nosies. YaHS has been tested in a wide range of genome assemblies. Compared to other Hi-C scaffolding tools, it usually generates more contiguous scaffolds - especially with a higher N90 and L90 statistics. It is also super fast - takes less than 5 minutes to reconstruct the human genome from an assembly of 5,483 contigs with ~45X Hi-C data.\n     ]]></help>\n     <citations>\n         <citation type="doi">10.5281/zenodo.5848772</citation>\n'