Repository 'ucsc_fatovcf'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ucsc_fatovcf

Changeset 0:78df8fc2b3ab (2023-01-11)
Next changeset 1:d9965e143053 (2023-09-21)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ucsc-tools/fatovcf commit 16910aa7b33e5ff73430be1ca9d7727f4ea5786a
added:
fatovcf.xml
test-data/excl.txt
test-data/input.fa
test-data/mask.vcf
test-data/out1.vcf
test-data/out2.vcf
b
diff -r 000000000000 -r 78df8fc2b3ab fatovcf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fatovcf.xml Wed Jan 11 13:14:29 2023 +0000
[
b'@@ -0,0 +1,143 @@\n+<tool id="fatovcf" name="faToVcf" version="@TOOL_VERSION@+galaxy0" profile="21.05" license="MIT">\n+    <description>\n+        Convert a FASTA alignment file to Variant Call Format (VCF) single-nucleotide diffs\n+    </description>\n+    <macros>\n+        <token name="@TOOL_VERSION@">426</token>\n+    </macros>\n+    <xrefs>\n+        <xref type="bio.tools">UCSC_Genome_Browser_Utilities</xref>\n+    </xrefs>\n+    <requirements>\n+        <requirement type="package" version="@TOOL_VERSION@">ucsc-fatovcf</requirement>\n+    </requirements>\n+    <version_command><![CDATA[ echo "@TOOL_VERSION@" ]]></version_command>\n+    <command detect_errors="exit_code"><![CDATA[\n+    #if $in_fasta\n+        ln -s \'$in_fasta\' in.fa &&\n+    #end if\n+    faToVcf\n+        in.fa\n+        \'$out\'\n+        #if $ref_seq.refSeq == "customRef"\n+            -ref=$ref_seq.ref\n+        #end if\n+        $ambiguous\n+        #if $excl_seq.excludeFile\n+            -excludeFile=\'$excl_seq.excludeFile\'\n+        #end if\n+        -maxDiff=$excl_seq.maxDiff\n+        #if $mask_sites.maskSites\n+            -maskSites=\'$mask_sites.maskSites\'\n+        #end if\n+        #if $mask_sites.windowSize > 0\n+            -windowSize=$mask_sites.windowSize\n+            -minAmbigInWindow=$mask_sites.minAmbigInWindow\n+        #end if\n+        $includeNoAltN\n+        -minAc=$minAc\n+        -minAf=$minAf\n+        #if $output.startOffset > 0\n+            -startOffset=$output.startOffset\n+        #end if\n+        $output.includeRef\n+        $output.noGenotypes\n+        #if $output.vcfChrom\n+            -vcfChrom=\'$output.vcfChrom\'\n+        #end if\n+    ]]></command>\n+    <inputs>\n+        <param name="in_fasta" format="fasta" type="data" label="FASTA Alignment" help="Must contain a series of sequences with different names and the same length. Both N and - are treated as missing information." />\n+        \n+        <conditional name="ref_seq">\n+            <param name="refSeq" type="select" label="Determine reference sequence" help="Which sequence from the FASTA file should be used as the reference sequence.">\n+                <option value="" selected="true">Use the first sequence as reference</option>\n+                <option value="customRef">Use a different sequence as reference</option>\n+            </param>\n+            <when value="customRef">\n+                <param argument="-ref" type="text" label="Name of sequence that should be used as reference sequence:" help="Must be present in the FASTA file." />\n+            </when>\n+            <when value="" />\n+        </conditional>\n+\n+        <param name="ambiguous" type="select" label="Treat ambiguous bases" help="If 1: Treat ambiguous bases as N, ambiguous bases (N, R, V etc.) are treated as N (no call). If 2: Resolve ambiguous characters, if the character represents two bases and one is the reference base, convert it to the non-reference base. Otherwise convert it to N. Default: 0: Don\'t treat ambiguous bases">\n+            <option value="" selected="true">0: Don\'t treat ambiguous bases</option>\n+            <option value="-ambiguousToN">1: Treat ambiguous bases as N (no call)</option>\n+            <option value="-resolveAmbiguous">2: Resolve ambiguous characters (convert)</option>\n+        </param>\n+\n+        <section name="excl_seq" title="Exclude sequences" expanded="true">\n+            <param argument="-excludeFile" format="txt" type="data" optional="true" label="Exclude sequences from text file" help="Exclude sequences named in file which has one sequence name per line." />\n+            <param argument="-maxDiff" type="integer" min="0" value="0" label="Maximum number of mismatches" help="Exclude sequences with more than N mismatches with the reference sequence. If -windowSize is used, sequences are masked accordingly before the mismatches are counted. Default: 0" />\n+        </section>\n+\n+        <section name="mask_sites" title="Mask sites" expanded="true">\n+            <param argument="-maskSites" format="vcf" type'..b'minAmbigWindow bases in a window of +-N bases around the base. Masking approach adapted from https://github.com/roblanf/sarscov2phylo/blob/master/scripts/mask_seq.py Use -windowSize=7 for same results. Default: 0" />\n+            <param argument="-minAmbigInWindow" type="integer" min="1" value="2" label="Minimum of ambiguous characters within the window given above" help="When -windowSize is provided, mask any base for which there are at least this many N, ambiguous or gap characters within the window. Default: 2" />\n+        </section>\n+        \n+        <param argument="-includeNoAltN" type="boolean" truevalue="-includeNoAltN" falsevalue="" label="Include positions without defined ALT allele" help="Include base positions with no alternate alleles observed, but at least one N (missing base/no-call). Default: false" />\n+        <param argument="-minAc" type="integer" min="0" value="0" label="Minimum allele count" help="Ignore alternate alleles observed fewer than N times. Default: 0" />\n+        <param argument="-minAf" type="float" min="0.0" max="1.0" value="0.0" label="Minimum allele frequency" help="Ignore alternate alleles observed in less than F of non-N bases. Default: 0.0" />\n+\n+        <section name="output" title="Output VCF options" expanded="true">\n+            <param argument="-startOffset" type="integer" min="0" value="0" label="Start offset" help="Add N bases to each position, e.g. for trimmed alignments. Default: 0" />\n+            <param argument="-includeRef" type="boolean" truevalue="-includeRef" falsevalue="" label="Include the reference in the genotype columns" help="Default: omitted as redundant (false)" />\n+            <param argument="-noGenotypes" type="boolean" truevalue="-noGenotypes" falsevalue="" label="Output 8-column VCF" help="VCF without the sample genotype columns. Default: false" />\n+            <param argument="-vcfChrom" type="text" optional="true" label="Use this sequence for the CHROM column in the VCF" help="Default: name of the reference sequence." />\n+        </section>\n+    </inputs>\n+    <outputs>\n+        <data name="out" format="vcf" />\n+    </outputs>\n+    <tests>\n+        <test expect_num_outputs="1"> <!-- default params -->\n+            <param name="in_fasta" value="input.fa" />\n+            <output name="out" ftype="vcf" file="out1.vcf" lines_diff="4" />\n+        </test>\n+        <test expect_num_outputs="1"> <!-- set a value for every input parameter-->\n+            <param name="in_fasta" value="input.fa" />\n+\n+            <conditional name="ref_seq">\n+                <param name="refSeq" value="customRef" />\n+                <param name="ref" value="sample3" />\n+            </conditional>\n+\n+            <param name="ambiguous" value="-ambiguousToN" />\n+\n+            <section name="excl_seq">\n+                <param name="excludeFile" value="excl.txt" />\n+                <param name="maxDiff" value="3" />\n+            </section>\n+\n+            <section name="mask_sites">\n+                <param name="maskSites" value="mask.vcf" />\n+                <param name="windowSize" value="7" />\n+                <param name="minAmbigInWindow" value="3" />\n+            </section>\n+\n+            <param name="includeNoAltN" value="true" />\n+            <param name="minAc" value="1" />\n+            <param name="minAf" value="0.1" />\n+\n+            <section name="output">\n+                <param name="startOffset" value="1" />\n+                <param name="includeRef" value="true" />\n+                <param name="noGenotypes" value="true" />\n+                <param name="vcfChrom" value="sample1" />\n+            </section>\n+\n+            <output name="out" ftype="vcf" file="out2.vcf" lines_diff="4" />\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+**What it does**\n+\n+`faToVcf`_ is a tool to extract a VCF from a multi-sequence FASTA alignment.\n+\n+.. _faToVcf: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/FOOTER.txt \n+    ]]>    </help>\n+    <citations>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 78df8fc2b3ab test-data/excl.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/excl.txt Wed Jan 11 13:14:29 2023 +0000
b
@@ -0,0 +1,1 @@
+sample2
b
diff -r 000000000000 -r 78df8fc2b3ab test-data/input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa Wed Jan 11 13:14:29 2023 +0000
b
@@ -0,0 +1,6 @@
+>sample1
+ANRGACACAGTCAC
+>sample2
+ARNGACAC----AC
+>sample3
+ARRAACGCATTCAN
b
diff -r 000000000000 -r 78df8fc2b3ab test-data/mask.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mask.vcf Wed Jan 11 13:14:29 2023 +0000
b
@@ -0,0 +1,7 @@
+##fileformat=VCFv4.3
+##INFO=<ID=GENE,Number=1,Type=String,Description="Position falls into range of this gene">
+##INFO=<ID=AA_POS,Number=1,Type=Integer,Description="Position of amino acid residue within gene">
+##INFO=<ID=AA_REF,Number=1,Type=String,Description="Reference amino acid residue">
+##INFO=<ID=AA_ALT,Number=.,Type=String,Description="List of alternative amino acid residues (IUPAC ambiguity code)">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+sample3 3 . T . . mask SUB=NDM;EXC=seq_end
b
diff -r 000000000000 -r 78df8fc2b3ab test-data/out1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out1.vcf Wed Jan 11 13:14:29 2023 +0000
b
@@ -0,0 +1,7 @@
+##fileformat=VCFv4.2
+##reference=test-data/input.fa:sample1
+##source=faToVcf test-data/input.fa test-data/out1.vcf
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample2 sample3
+sample1 4 G4A G A . . AC=1;AN=2 GT 0 1
+sample1 7 A7G A G . . AC=1;AN=2 GT 0 1
+sample1 10 G10T G T . . AC=1;AN=1 GT . 1
b
diff -r 000000000000 -r 78df8fc2b3ab test-data/out2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out2.vcf Wed Jan 11 13:14:29 2023 +0000
b
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.2
+##reference=test-data/input.fa:sample3
+##source=faToVcf test-data/input.fa out-test.vcf
+#CHROM POS ID REF ALT QUAL FILTER INFO
+sample1 3 R3R R * . . AC=0;AN=0
+sample1 5 A5G A G . . AC=1;AN=2
+sample1 8 G8A G A . . AC=1;AN=2
+sample1 11 T11G T G . . AC=1;AN=2