Mercurial > repos > bgruening > bionano_scaffold
view bionano_scaffold.xml @ 11:3371c5bdc17a draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bionano commit f2952aec50c8bf269f95bd3caa634b232e640729
author | bgruening |
---|---|
date | Mon, 27 Feb 2023 14:32:45 +0000 |
parents | 22286dab5aa1 |
children |
line wrap: on
line source
<tool id="bionano_scaffold" name="Bionano Hybrid Scaffold" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> <description>automates the scaffolding process</description> <macros> <import>macros.xml</import> </macros> <expand macro="edam_ontology"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ #set RefAligner = '/RefAligner/RefAligner' #set output_file_NCBI = 'hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD_NCBI.fasta' #set output_file_not_scaffolded = 'hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD_NOT_SCAFFOLDED.fasta' ## softlinks do not work cp '${ngs_fasta}' ./ngs.fasta && cp '${bionano_cmap}' ./bionano.cmap && export GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-8192}/1024)) ## return max out of GALAXY_SLOTS and 2 --> use a minimum 2 slots && export SLOTS=\$(( \${GALAXY_SLOTS:-2} > 2 ? \${GALAXY_SLOTS:-2} : 2 )) #if $configuration_options.configuration == 'vgp' && cp '${vgp_mode}' ./config.xml && sed -i "s|__MEMORY__|\$GALAXY_MEMORY_GB|" ./config.xml && sed -i "s|__CORES__|\$SLOTS|" ./config.xml #else && cp '${configuration_file}' ./config.xml && sed -i "s|attr=\"maxmem\" val0=.* display|attr=\"maxmem\" val0=\"\$GALAXY_MEMORY_GB\" display|" ./config.xml && sed -i "s|attr=\"maxthreads\" val0=.* display|attr=\"maxthreads\" val0=\"\$SLOTS\" display|" ./config.xml && sed -i "s|attr=\"maxvirtmem\" val0=.*/>|attr=\"maxvirtmem\" val0=\"\$GALAXY_MEMORY_GB\"/>|" ./config.xml && sed -i "s|attr=\"insertThreads\" val0=.*/>|attr=\"insertThreads\" val0=\"\$SLOTS\"/>|" ./config.xml #end if ## output the configuration file on stdout && cat ./config.xml && perl '/HybridScaffold/hybridScaffold.pl' -n ./ngs.fasta -b ./bionano.cmap -c ./config.xml -r $RefAligner #if $conflict_resolution -M '${conflict_resolution}' #else -B $conflict_filter_genome -N $conflict_filter_sequence #end if ##$align_molecules_options.align_molecules ###if $align_molecules_options.align_molecules ## -m $align_molecules_options.bionano_molecules ## -q '${align_molecules_options.optarguments_xml}' ## -p ##/home/bionano/tools/pipeline/1.0/Pipeline/1.0/ ###end if ##$quimeric_quality_options.quimeric_quality ###if $quimeric_quality_options.quimeric_quality ## -m $align_molecules_options.bionano_molecules ## #if $quimeric_quality_conditional.noise_parameter ## -e '${quimeric_quality_conditional.noise_parameter}' ## #end if ###end if -f $zip_file -o ./ #if $trim_cut_sites && export PATH=/opt/conda/bin/:\$PATH && python '$__tool_directory__/remove_fake_cut_sites.py' $output_file_NCBI 'SCAFFOLD_NCBI_trimmed.fasta' 'output.log' && python '$__tool_directory__/remove_fake_cut_sites.py' $output_file_not_scaffolded 'NOT_SCAFFOLDED_trimmed.fasta' 'output.log' #end if ## the next lines are required because the XXXX_cut.txt file is not always generated && touch hybrid_scaffolds/temp_cut.txt && cat hybrid_scaffolds/*cut.txt > hybrid_scaffolds/keys.txt ]]> </command> <configfiles> <configfile name="vgp_mode"><![CDATA[ #if $configuration_options.configuration == 'vgp' <hybridScaffold> <version> <flag attr="version" val0="\$Id: hybridScaffold_DLE1_config.xml 7702 2018-06-25 20:53:51Z apang \$"/> </version> <global> <flag attr="maxmem" val0="__MEMORY__" display="Maximum memory (GB)" group="Global options" description="Define the maximum amount of RAM in gigabytes to be used by each process."/> <flag attr="maxthreads" val0="__CORES__" display="Max threads" group="Global options" description="Define maximum number of threads to be used by each process."/> <flag attr="maxvirtmem" val0="__MEMORY__"/> <flag attr="RAmem" val0="3" val1="1"/> </global> <fasta2cmap> <flag attr="enzyme" val0="$configuration_options.enzyme" display="Enzyme" group="FASTA to CMAP digestion" description="Define single enzyme for in-silico FASTA to CMAP digestion. Available enzymes: BspQI, BbvCI, BsmI, BsrDI, BssSI, DLE1."/> <flag attr="channelNum" val0="1" display="Channel number" group="FASTA to CMAP digestion" description="Specify the channel the enzyme was used."/> <flag attr="minLabels" val0="0" display="Minimum label sites" group="FASTA to CMAP digestion" description="Specify minimum number of label sites per digested contig."/> <flag attr="minLength" val0="0" display="Minimum length (Kb)" group="FASTA to CMAP digestion" description="Specify minimum length in Kb of each digested contig."/> </fasta2cmap> <align0> <flag attr="M" val0="1" val1="3"/> <flag attr="ScaleDelta" val0="0.02" val1="15"/> <flag attr="ScaleDeltaBPP"/> <flag attr="hashScaleDelta" val0="2"/> <flag attr="res" val0="2.9"/> <flag attr="FP" val0="0.6"/> <flag attr="FN" val0="0.06"/> <flag attr="sf" val0="0.20"/> <flag attr="sd" val0="0.0"/> <flag attr="sr" val0="0.01"/> <flag attr="extend" val0="1"/> <flag attr="outlier" val0="0.0001"/> <flag attr="endoutlier" val0="0.001"/> <flag attr="PVendoutlier"/> <flag attr="deltaX" val0="12"/> <flag attr="deltaY" val0="12"/> <flag attr="xmapchim" val0="12"/> <flag attr="hashgen" val0="5" val1="7" val2="2.4" val3="1.5" val4="0.05" val5="5.0" val6="1" val7="1" val8="4"/> <flag attr="hash" val0="-hashdelta" val1="26" val2="10" val3="46"/> <flag attr="hashMultiMatch" val0="30" val1="10"/> <flag attr="insertThreads" val0="__CORES__"/> <flag attr="nosplit" val0="2"/> <flag attr="biaswt" val0="0"/> <flag attr="T" val0="1e-10" display="P-value" group="Initial alignment" description="Minimum confidence value to output initial alignments. Recommended starting value of 1e-5/genome size in Mb."/> <flag attr="S" val0="-1000"/> <flag attr="indel"/> <flag attr="PVres" val0="2"/> <flag attr="rres" val0="0.9"/> <flag attr="MaxSE" val0="0.5"/> <flag attr="HSDrange" val0="1.0"/> <flag attr="outlierBC"/> <flag attr="xmapUnique" val0="12"/> <flag attr="AlignRes" val0="2."/> <flag attr="outlierExtend" val0="12" val1="24"/> <flag attr="Kmax" val0="12"/> <flag attr="resEstimate"/> <flag attr="f"/> <flag attr="mres" val0="0.9"/> </align0> <align1> <flag attr="res" val0="2.9"/> <flag attr="FP" val0="0.6"/> <flag attr="FN" val0="0.06"/> <flag attr="sf" val0="0.20"/> <flag attr="sd" val0="0.0"/> <flag attr="sr" val0="0.01"/> <flag attr="extend" val0="1"/> <flag attr="outlier" val0="0.0001"/> <flag attr="endoutlier" val0="0.001"/> <flag attr="PVendoutlier"/> <flag attr="deltaX" val0="12"/> <flag attr="deltaY" val0="12"/> <flag attr="xmapchim" val0="12"/> <flag attr="hashgen" val0="5" val1="7" val2="2.4" val3="1.5" val4="0.05" val5="5.0" val6="1" val7="1" val8="4"/> <flag attr="hash" val0="-hashdelta" val1="26" val2="10" val3="46"/> <flag attr="hashMultiMatch" val0="30" val1="10"/> <flag attr="insertThreads" val0="__CORES__"/> <flag attr="nosplit" val0="2"/> <flag attr="biaswt" val0="0"/> <flag attr="T" val0="1e-10" display="P-value" group="Initial alignment" description="Minimum confidence value to output initial alignments. Recommended starting value of 1e-5/genome size in Mb."/> <flag attr="S" val0="-1000"/> <flag attr="indel"/> <flag attr="PVres" val0="2"/> <flag attr="rres" val0="0.9"/> <flag attr="MaxSE" val0="0.5"/> <flag attr="HSDrange" val0="1.0"/> <flag attr="outlierBC"/> <flag attr="xmapUnique" val0="12"/> <flag attr="AlignRes" val0="2."/> <flag attr="outlierExtend" val0="12" val1="24"/> <flag attr="Kmax" val0="12"/> <flag attr="resEstimate"/> <flag attr="f"/> <flag attr="mres" val0="0.9"/> </align1> <assignAlignType> <flag attr="T_cutoff" val0="1e-11" display="P-value" group="Chimeric/conflicting alignment flagging" description="Minimum confidence value used to flag chimeric/conflicting alignments. Recommand to set it to be the same as the merge_Tvalue below."/> <flag attr="max_overhang" val0="5" display=""/> </assignAlignType> <cut_conflicts> <flag attr="window_size" val0="10000" display="Distance (bp)" group="Conflict-cut" description="The distance (bp) from a conflicting site within which the chimeric quality score of BioNano genome map labels will be examined"/> <flag attr="min_quality_score_threshold" val0="35" display="Percent (%)" group="Conflict-cut" description="The minimal percentage of molecules spanning to the left and right of a label of interest, thus supporting the BioNano assembly at that region"/> <flag attr="min_coverage_threshold" val0="10" display="Coverage (X)" group="Conflict-cut" description="The minimal number of molecules aligning to a label of interest in the BioNano assembly"/> </cut_conflicts> <mergeNGS_BN> <flag attr="merge_Tvalue" val0="1e-11" display="P-value" group="Merging" description="Minimum confidence value used to merge alignments. Recommand to set it to be the same as the assignAlignType T_cutoff above"/> <flag attr="id_shift" val0="100000" display="BioNano cmap id shift" group="Merging" description="Value to shift the BioNano cmap id to distinguish the BioNano cmaps from sequence cmaps. Recommand to set it to be greater than the number of sequence entries."/> <flag attr="max_merge_rounds" val0="40" display=""/> <flag attr="endoutlier" val0="1e-4" display=""/> <flag attr="outlier" val0="1e-4" display=""/> <flag attr="biaswt" val0="0" display=""/> <flag attr="sd" val0="0.1" display=""/> <flag attr="res" val0="2.9" display=""/> <flag attr="mres" val0="2.9" display=""/> <flag attr="sf" val0="0.2" display=""/> <flag attr="RepeatMask" val0="4" val1="0.01" display=""/> <flag attr="RepeatRec" val0="0.7" val1="0.6" val2="1.4" display=""/> <flag attr="pairmerge" val0="80" val1="0.2" display="Min alignment length and Max endoutlier" group="Merging" description="Minimum alignment length required for pair merge, and the maximum endoutlier allowed."/> <flag attr="maxmem" val0="__MEMORY__" display=""/> <flag attr="pairmergeRepeat"/> <flag attr="NoBpp"/> <flag attr="first" val0="-1" display=""/> <flag attr="f"/> </mergeNGS_BN> <align_final_1st_pass> <flag attr="res" val0="2.9"/> <flag attr="FP" val0="0.6"/> <flag attr="FN" val0="0.06"/> <flag attr="sf" val0="0.20"/> <flag attr="sd" val0="0.0"/> <flag attr="sr" val0="0.01"/> <flag attr="extend" val0="1"/> <flag attr="outlier" val0="0.0001"/> <flag attr="endoutlier" val0="0.001"/> <flag attr="PVendoutlier"/> <flag attr="deltaX" val0="12"/> <flag attr="deltaY" val0="12"/> <flag attr="xmapchim" val0="12"/> <flag attr="hashgen" val0="5" val1="7" val2="2.4" val3="1.5" val4="0.05" val5="5.0" val6="1" val7="1" val8="4"/> <flag attr="hash" val0="-hashdelta" val1="26" val2="10" val3="46"/> <flag attr="hashMultiMatch" val0="30" val1="10"/> <flag attr="insertThreads" val0="__CORES__"/> <flag attr="nosplit" val0="2"/> <flag attr="biaswt" val0="0"/> <flag attr="T" val0="1e-10" display="P-value" group="Initial alignment" description="Minimum confidence value to output intial alignments. Recommended starting value of 1e-5/genome size in Mb."/> <flag attr="S" val0="-1000"/> <flag attr="indel"/> <flag attr="PVres" val0="2"/> <flag attr="rres" val0="0.9"/> <flag attr="MaxSE" val0="0.5"/> <flag attr="HSDrange" val0="1.0"/> <flag attr="outlierBC"/> <flag attr="xmapUnique" val0="12"/> <flag attr="AlignRes" val0="2."/> <flag attr="outlierExtend" val0="6" val1="24"/> <flag attr="Kmax" val0="6"/> <flag attr="resEstimate"/> <flag attr="f"/> <flag attr="mres" val0="0.9"/> <flag attr="MultiMatches" val0="5"/> </align_final_1st_pass> <align_final_2nd_pass> <flag attr="res" val0="2.9"/> <flag attr="FP" val0="0.6"/> <flag attr="FN" val0="0.06"/> <flag attr="sf" val0="0.20"/> <flag attr="sd" val0="0.0"/> <flag attr="sr" val0="0.01"/> <flag attr="extend" val0="1"/> <flag attr="outlier" val0="0.0001"/> <flag attr="endoutlier" val0="0.001"/> <flag attr="PVendoutlier"/> <flag attr="deltaX" val0="12"/> <flag attr="deltaY" val0="12"/> <flag attr="xmapchim" val0="12"/> <flag attr="hashgen" val0="5" val1="3" val2="2.4" val3="1.5" val4="0.05" val5="5.0" val6="1" val7="1" val8="4"/> <flag attr="hash" val0="-hashdelta" val1="50"/> <flag attr="hashMultiMatch" val0="30" val1="10" val2="3"/> <flag attr="insertThreads" val0="__CORES__"/> <flag attr="nosplit" val0="2"/> <flag attr="biaswt" val0="0"/> <flag attr="T" val0="1e-10" display="P-value" group="Final alignment" description="Minimum confidence score used to align NGS contigs back to hybrid scaffold."/> <flag attr="S" val0="-1000"/> <flag attr="indel"/> <flag attr="PVres" val0="2"/> <flag attr="rres" val0="0.9"/> <flag attr="MaxSE" val0="0.5"/> <flag attr="HSDrange" val0="1.0"/> <flag attr="outlierBC"/> <flag attr="xmapUnique" val0="12"/> <flag attr="AlignRes" val0="2."/> <flag attr="outlierExtend" val0="12" val1="24"/> <flag attr="Kmax" val0="12"/> <flag attr="resEstimate"/> <flag attr="f"/> <flag attr="mres" val0="0.9"/> <flag attr="MultiMatches" val0="5"/> </align_final_2nd_pass> <align_final_BNG> <flag attr="res" val0="2.9"/> <flag attr="FP" val0="0.6"/> <flag attr="FN" val0="0.06"/> <flag attr="sf" val0="0.20"/> <flag attr="sd" val0="0.0"/> <flag attr="sr" val0="0.01"/> <flag attr="extend" val0="1"/> <flag attr="outlier" val0="0.0001"/> <flag attr="endoutlier" val0="0.001"/> <flag attr="PVendoutlier"/> <flag attr="deltaX" val0="6"/> <flag attr="deltaY" val0="6"/> <flag attr="xmapchim" val0="12"/> <flag attr="hashgen" val0="5" val1="7" val2="2.4" val3="1.5" val4="0.05" val5="5.0" val6="1" val7="1" val8="4"/> <flag attr="hash" val0="-hashdelta" val1="26" val2="10" val3="46"/> <flag attr="hashMultiMatch" val0="30" val1="10"/> <flag attr="insertThreads" val0="__CORES__"/> <flag attr="nosplit" val0="2"/> <flag attr="biaswt" val0="0"/> <flag attr="T" val0="1e-10" display="P-value" group="Final alignment" description="Minimum confidence score used to align NGS contigs back to hybrid scaffold."/> <flag attr="S" val0="-1000"/> <flag attr="indel"/> <flag attr="PVres" val0="2"/> <flag attr="rres" val0="0.9"/> <flag attr="MaxSE" val0="0.5"/> <flag attr="HSDrange" val0="1.0"/> <flag attr="outlierBC"/> <flag attr="xmapUnique" val0="14"/> <flag attr="AlignRes" val0="2."/> <flag attr="outlierExtend" val0="6" val1="24"/> <flag attr="Kmax" val0="6"/> <flag attr="resEstimate"/> <flag attr="BestRef" val0="1"/> <flag attr="f"/> <flag attr="mres" val0="0.9"/> </align_final_BNG> <refineFinal1> <flag attr="refine" val0="1" display="Refine Map" group="Final Refinement" default0="3"/> <flag attr="usecolor" val0="1" default0="1" display="Color Channel" group="Final Refinement"/> <flag attr="A" val0="5" display="Aligned Sites Threshold" group="Final Refinement" default0="5"/> <flat attr="S" val0="-9"/> <flag attr="L" val0="130"/> <flag attr="maptype" val0="0"/> <flag attr="extend" val0="1" display="Allow Extended Molecule" group="Final Refinement" default0="1"/> <flag attr="MaxCov" val0="100" display="Max Coverage" group="Final Refinement" default0="100"/> <flag attr="Mprobeval" display="Fast Mode" group="Final Refinement"/> <flag attr="splitcnt"/> <flag attr="splitrev" val0="2"/> <flag attr="CmapSNR"/> <flag attr="splitsite"/> <flag attr="MinSplitLen" val0="50.0"/> <flag attr="MaxSE" val0="0.5"/> <flag attr="outlier" val0="2e-2" display="Min Outliers P-value" group="Second Refinement" default0="1e-5"/> <flag attr="outlierMax" val0="80."/> <flag attr="outlierLambda" val0="20."/> <flag attr="endoutlier" val0="1e-3" display="Molecule Ends P-value Cutoff" group="Second Refinement" default0="1e-4"/> <flag attr="endoutlierRef" val0="1e-4" val1="1e-3"/> <flag attr="skip_dist" val0="0.0" val2="0.0"/> <flag attr="endoutlierFinal" val0="1"/> <flag attr="maxEnd" val0="90."/> <flag attr="RTHETA_FIX" val0="0"/> <flag attr="TB" val0="1e-6" val1="1e-6" val2="-TBmult" val3="0.1"/> <flag attr="nosplit" val0="2" display="Allow Chimeric Split" group="Final Refinement" default0="2"/> <flag attr="EndTrim" val0="0.0" display="Min End Trim Coverage" group="Final Refinement" default0="4.99"/> <flag attr="biaswt" val0="0.7" display="Bias" group="Final Refinement" default0="0"/> <flag attr="biaswtEnd" val0="0.0"/> <flag attr="biaswtRefine" val0="0.7" val2="1"/> <flag attr="biaswtOutlier" val0="0.0"/> <flag attr="LRbias" val0="1e0" display="Soft Threshold" group="Final Refinement" default0="1e2"/> <flag attr="deltaX" val0="6" display="Molecule Labels Metric" group="Final Refinement" default0="4"/> <flag attr="deltaY" val0="7" display="Mapped Labels Metric" group="Final Refinement" default0="6"/> <flag attr="RepeatMask" val0="5" val1="0.01" display="Repeat Mask P-values" group="Final Refinement" default0="2" default1="0.01"/> <flag attr="RepeatRec" val0="0.7" val1="0.6" val2="1.4"/> <flag attr="CovTrim" val0="2"/> <flag attr="CovTrimLen" val0="55"/> <flag attr="TrimNorm" val0="0"/> <flag attr="TrimNormMed" val0="100"/> <flag attr="TrimNormChim" val0="2"/> <flag attr="TrimNormMin" val0="2"/> <flag attr="PVres" val0="2"/> <flag attr="PVendoutlier"/> <flag attr="AlignRes" val0="1.5"/> <flag attr="rres" val0="1.2"/> <flag attr="cres" val0="5.6" val1="3" val2="0.1"/> <flag attr="hashgen" val00="5" val01="3" val02="2.4" val03="1.5" val04="0.05" val05="5.0" val06="1" val07="1" val08="3"/> <flag attr="hash" val00="-hashdelta" val01="10" val02="10" val03="-mres" val04="0.9"/> <flag attr="HSDrange" val0="1.0"/> <flag attr="hashoffset" val0="1"/> <flag attr="hashMultiMatch" val0="15"/> <flag attr="insertThreads" val0="__CORES__"/> <flag attr="BestRef" val0="1"/> <flag attr="BestRefPV" val0="1"/> <flag attr="ChimQuality"/> <flag attr="f" display="Overwrite Output Files" group="Final Refinement"/> <flag attr="T" val0="1e-11" display="P Value Cutoff Threshold"/> </refineFinal1> </hybridScaffold> #end if ]]></configfile> </configfiles> <inputs> <param name="ngs_fasta" argument="-n" type="data" format="fasta" label="NGS FASTA" help="Input NGS FASTA"/> <param name="bionano_cmap" argument="-b" type="data" format="cmap" label="BioNano CMAP" help="Input BioNano CMAP"/> <conditional name="configuration_options"> <param name="configuration" argument="-c" type="select" label="Configuration mode" help="It defines the parameters used in each step of hybrid scaffold"> <option value="vgp">VGP mode</option> <option value="file">Select a configuration file</option> </param> <when value="vgp"> <param name="enzyme" type="text" value="CTTAAG" label="Restriction enzyme" help="Define single enzyme for in-silico FASTA to CMAP digestion. Avalible enzymes: BspQI, BbvCI, BsmI, BsrDI, BssSI, DLE1."> <expand macro="sanitize_string" /> </param> </when> <when value="file"> <param name="configuration_file" type="data" format="xml,txt" label="Configuration file" help="Depending on the complexity of the genome of interest, the values of certain parameters could be adjusted accordingly."/> </when> </conditional> <param name="conflict_resolution" argument="-M" type="data" format="txt" optional="True" label="Conflict resolution file" help="Input a conflict resolution file indicating which NGS and BioNano conflicting contigs to be cut [optional]"/> <param name="conflict_filter_genome" argument="-B" type="select" label="Genome maps conflict filter" help="Conflict filter level genome maps [required if not using -M option]"> <option value="1">No filter</option> <option value="2">Cut contig at conflict</option> <option value="3">Exclude conflicting contig</option> </param> <param name="conflict_filter_sequence" argument="-N" type="select" label="Sequences conflict filter" help="Conflict filter level for sequences [required if not using -M option]"> <option value="1">No filter</option> <option value="2">Cut contig at conflict</option> <option value="3">Exclude conflicting contig</option> </param> <param name="all_files" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Provide BisCot required files" help="Generate a collection with the files contained in the hybrid_scaffolds folder" /> <param name="zip_file" argument="-z" type="boolean" truevalue="-z results.zip" falsevalue="" checked="false" label="Generate an output package in ZIP format" help="The hybrid scaffold output package (.zip) can be imported into Access for visualization" /> <param name="trim_cut_sites" type="boolean" checked="true" label="Remove BioNano cut sites" help="This option removes the spurious BioNano cut sites that are inserted into gaps in some assemblies, replacing them with Ns." /> <!-- Those options have been disabled because the Docker container doesn't include the required packages <conditional name="align_molecules_options"> <param name="align_molecules" type="select" label="Align molecules to hybrid scaffolds and genome maps" help="Flag to generate molecules to hybrid scaffold alignment and molecules to genome map alignment"> <option value="-x">True</option> <option value="" selected="True">False</option> </param> <when value="-x"> <param name="bionano_molecules" type="data" format="bnx" label="Bionano molecules" help="Input BioNano molecules BNX [optional; only required for either the -x or -y option]"/> <param name="optarguments_xml" type="data" format="xml" label="OptArguments XML script" help="Input de novo assembly pipeline optArguments XML script [optional; only required for -x option]"/> </when> <when value=""> </when> </conditional> <conditional name="quimeric_quality_options"> <param name="quimeric_quality" type="select" label="Generate quimeric quality score" help="Flag to generate chimeric quality score for the Input BioNano CMAP"> <option value="-y">True</option> <option value="" selected="True">False</option> </param> <when value="-y"> <param name="bionano_molecules" type="data" format="bnx" label="Bionano molecules" help="Input BioNano molecules BNX [optional; only required for either the -x or -y option]"/> <param name="noise_parameter" type="data" format="txt" optional="True" label="Noise parameter file" help="Input de novo assembly noise parameter .errbin or .err file [optional; recommended for -y option but not required]"/> </when> <when value=""> </when> </conditional> --> </inputs> <outputs> <data name="ngs_contigs_scaffold" format="fasta" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD_NCBI.fasta" label="${tool.name} on ${on_string}: NGScontigs scaffold NCBI"> <filter>trim_cut_sites == False</filter> </data> <data name="ngs_contigs_not_scaffolded" format="fasta" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD_NOT_SCAFFOLDED.fasta" label="${tool.name} on ${on_string}: NGScontigs not scaffolded"> <filter>trim_cut_sites == False</filter> </data> <data name="ngs_contigs_scaffold_trimmed" format="fasta" from_work_dir="SCAFFOLD_NCBI_trimmed.fasta" label="${tool.name} on ${on_string}: NGScontigs scaffold NCBI trimmed"> <filter>trim_cut_sites</filter> </data> <data name="ngs_contigs_not_scaffolded_trimmed" format="fasta" from_work_dir="NOT_SCAFFOLDED_trimmed.fasta" label="${tool.name} on ${on_string}: NGScontigs not scaffolded trimmed"> <filter>trim_cut_sites</filter> </data> <data name="report" format="txt" from_work_dir="hybrid_scaffolds/hybrid_scaffold_informatics_report.txt" label="${tool.name} on ${on_string}: hybrid scaffold report"/> <data name="conflicts" format="txt" from_work_dir="hybrid_scaffolds/conflicts.txt" label="${tool.name} on ${on_string}: conflicts"/> <data name="results" format="zip" from_work_dir="results.zip" label="${tool.name} on ${on_string}: results"> <filter>zip_file</filter> </data> <data name="ngs_contigs_scaffold_agp" format="agp" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD.agp" label="${tool.name} on ${on_string}: AGP file"/> <data name="query_file" format="cmap" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_BNGcontigs_HYBRID_SCAFFOLD_q.cmap" label="${tool.name} on ${on_string}: query CMAP"> <filter>all_files</filter> </data> <data name="reference_file" format="cmap" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_BNGcontigs_HYBRID_SCAFFOLD_r.cmap" label="${tool.name} on ${on_string}: reference CMAP"> <filter>all_files</filter> </data> <data name="xmap_file" format="xml" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD.xmap" label="${tool.name} on ${on_string}: XMAP configuration"> <filter>all_files</filter> </data> <data name="key_file" format="tabular" from_work_dir="hybrid_scaffolds/keys.txt" label="${tool.name} on ${on_string}: Bionano key file"> <filter>all_files</filter> </data> </outputs> <tests> <test expect_num_outputs="6"> <param name="ngs_fasta" value="assembly.fasta.gz"/> <param name="bionano_cmap" value="colormap_assembly.cmap"/> <param name="conflict_filter_genome" value="3"/> <param name="conflict_filter_sequence" value="3"/> <conditional name="configuration_options"> <param name="configuration" value="file"/> <param name="configuration_file" value="configuration.xml"/> </conditional> <param name="zip_file" value="true"/> <param name="trim_cut_sites" value="false"/> <output name="ngs_contigs_scaffold" ftype="fasta"> <assert_contents> <has_size value="4753369" delta="100" /> <has_n_lines n="2"/> <has_line line=">Super-Scaffold_1"/> </assert_contents> </output> <output name="report" file="test_01_report.txt" ftype="txt"/> <output name="ngs_contigs_scaffold_agp" file="test_01.agp" ftype="agp"/> <output name="conflicts" ftype="txt"> <assert_contents> <has_text text="alignmentOrientation" /> </assert_contents> </output> <output name="results" ftype="zip"> <assert_contents> <has_archive_member path=".*/status.txt"/> </assert_contents> </output> <assert_stdout> <has_text_matching expression='attr="maxmem" val0="\d+"'/> <has_text_matching expression='attr="maxthreads" val0="\d+"'/> <has_text_matching expression='attr="insertThreads" val0="\d+"'/> <has_text_matching expression='attr="maxvirtmem" val0="\d+"'/> <has_text text="hybridScaffold"/> </assert_stdout> </test> <test expect_num_outputs="5"> <param name="ngs_fasta" value="assembly.fasta.gz"/> <param name="bionano_cmap" value="colormap_assembly.cmap"/> <param name="conflict_filter_genome" value="2"/> <param name="conflict_filter_sequence" value="2"/> <param name="trim_cut_sites" value="false"/> <conditional name="configuration_options"> <param name="configuration" value="file"/> <param name="configuration_file" value="configuration.xml"/> </conditional> <output name="ngs_contigs_scaffold" ftype="fasta"> <assert_contents> <has_size value="4753369" delta="100" /> <has_n_lines n="2"/> <has_line line=">Super-Scaffold_1"/> </assert_contents> </output> <output name="report" file="test_02_report.txt" ftype="txt"/> <output name="ngs_contigs_scaffold_agp" ftype="agp"> <assert_contents> <has_size value="311" delta="20" /> <has_text text="##agp-version"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="conflicts" ftype="txt"> <assert_contents> <has_text text="alignmentOrientation" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression='attr="maxmem" val0="\d+"'/> <has_text_matching expression='attr="maxthreads" val0="\d+"'/> <has_text_matching expression='attr="insertThreads" val0="\d+"'/> <has_text_matching expression='attr="maxvirtmem" val0="\d+"'/> <has_text text="hybridScaffold"/> </assert_stdout> </test> <test expect_num_outputs="6"> <param name="ngs_fasta" value="assembly.fasta.gz"/> <param name="bionano_cmap" value="colormap_assembly.cmap"/> <param name="conflict_filter_genome" value="2"/> <param name="conflict_filter_sequence" value="3"/> <conditional name="configuration_options"> <param name="configuration" value="file"/> <param name="configuration_file" value="configuration.xml"/> </conditional> <param name="zip_file" value="true"/> <param name="trim_cut_sites" value="false"/> <output name="ngs_contigs_scaffold" ftype="fasta"> <assert_contents> <has_size value="4753369" delta="100" /> <has_n_lines n="2"/> <has_line line=">Super-Scaffold_1"/> </assert_contents> </output> <output name="report" file="test_03_report.txt" ftype="txt"/> <output name="ngs_contigs_scaffold_agp" ftype="agp"> <assert_contents> <has_size value="311" delta="20" /> <has_text text="##agp-version"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="conflicts" ftype="txt"> <assert_contents> <has_text text="alignmentOrientation" /> </assert_contents> </output> <output name="results" ftype="zip"> <assert_contents> <has_archive_member path=".*/status.txt"/> </assert_contents> </output> <assert_stdout> <has_text_matching expression='attr="maxmem" val0="\d+"'/> <has_text_matching expression='attr="maxthreads" val0="\d+"'/> <has_text_matching expression='attr="insertThreads" val0="\d+"'/> <has_text_matching expression='attr="maxvirtmem" val0="\d+"'/> <has_text text="hybridScaffold"/> </assert_stdout> </test> <test expect_num_outputs="6"> <param name="ngs_fasta" value="assembly.fasta.gz"/> <param name="bionano_cmap" value="colormap_assembly.cmap"/> <param name="conflict_filter_genome" value="2"/> <param name="conflict_filter_sequence" value="3"/> <conditional name="configuration_options"> <param name="configuration" value="vgp"/> <param name="enzyme" value="BspQI"/> </conditional> <param name="zip_file" value="true"/> <param name="trim_cut_sites" value="false"/> <output name="ngs_contigs_scaffold" ftype="fasta"> <assert_contents> <has_size value="4753369" delta="100" /> <has_n_lines n="2"/> <has_line line=">Super-Scaffold_1"/> </assert_contents> </output> <output name="report" file="test_04_report.txt" ftype="txt"/> <output name="ngs_contigs_scaffold_agp" ftype="agp"> <assert_contents> <has_size value="311" delta="20" /> <has_text text="##agp-version"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="conflicts" ftype="txt"> <assert_contents> <has_text text="alignmentOrientation" /> </assert_contents> </output> <output name="results" ftype="zip"> <assert_contents> <has_archive_member path=".*/status.txt"/> </assert_contents> </output> <assert_stdout> <has_text_matching expression='attr="maxmem" val0="\d+"'/> <has_text_matching expression='attr="maxthreads" val0="\d+"'/> <has_text_matching expression='attr="insertThreads" val0="\d+"'/> <has_text_matching expression='attr="maxvirtmem" val0="\d+"'/> <has_text text="hybridScaffold"/> </assert_stdout> </test> <test expect_num_outputs="5"> <param name="ngs_fasta" value="assembly.fasta.gz"/> <param name="bionano_cmap" value="colormap_assembly.cmap"/> <param name="conflict_filter_genome" value="3"/> <param name="conflict_filter_sequence" value="3"/> <conditional name="configuration_options"> <param name="configuration" value="file"/> <param name="configuration_file" value="configuration.xml"/> </conditional> <param name="trim_cut_sites" value="true"/> <output name="ngs_contigs_scaffold_trimmed" ftype="fasta"> <assert_contents> <has_size value="4832591" delta="100" /> <has_n_lines n="79224"/> <has_line line=">Super-Scaffold_1"/> </assert_contents> </output> <output name="conflicts" ftype="txt"> <assert_contents> <has_text text="alignmentOrientation" /> </assert_contents> </output> <output name="report" file="test_05_report.txt" ftype="txt"/> <output name="ngs_contigs_scaffold_agp" ftype="agp"> <assert_contents> <has_size value="311" delta="20" /> <has_text text="##agp-version"/> <has_n_lines n="9"/> </assert_contents> </output> <assert_stdout> <has_text_matching expression='attr="maxmem" val0="\d+"'/> <has_text_matching expression='attr="maxthreads" val0="\d+"'/> <has_text_matching expression='attr="insertThreads" val0="\d+"'/> <has_text_matching expression='attr="maxvirtmem" val0="\d+"'/> <has_text text="hybridScaffold"/> </assert_stdout> </test> <test expect_num_outputs="9"> <param name="ngs_fasta" value="assembly.fasta.gz"/> <param name="bionano_cmap" value="colormap_assembly.cmap"/> <param name="conflict_filter_genome" value="3"/> <param name="conflict_filter_sequence" value="3"/> <param name="all_files" value="true"/> <conditional name="configuration_options"> <param name="configuration" value="file"/> <param name="configuration_file" value="configuration.xml"/> </conditional> <param name="trim_cut_sites" value="true"/> <output name="ngs_contigs_scaffold_trimmed" ftype="fasta"> <assert_contents> <has_size value="4832591" delta="100" /> <has_n_lines n="79224"/> <has_line line=">Super-Scaffold_1"/> </assert_contents> </output> <output name="conflicts" ftype="txt"> <assert_contents> <has_text text="alignmentOrientation" /> </assert_contents> </output> <output name="report" file="test_05_report.txt" ftype="txt"/> <output name="ngs_contigs_scaffold_agp" ftype="agp"> <assert_contents> <has_size value="311" delta="20" /> <has_text text="##agp-version"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="query_file" ftype="cmap"> <assert_contents> <has_text text="3879935.1"/> <has_text text="Coverage refer to the interval between current site and next site"/> </assert_contents> </output> <output name="reference_file" ftype="cmap"> <assert_contents> <has_text text="4753350.0"/> <has_text text=" StdDev refers to the interval between the current site and the next site"/> </assert_contents> </output> <output name="xmap_file" ftype="xml"> <assert_contents> <has_text text="XMAP File Version"/> <has_text text="(7,7)(8,8)(9,9)(10,10)(11,11)(12,12)(13,13)"/> </assert_contents> </output> <assert_stdout> <has_text_matching expression='attr="maxmem" val0="\d+"'/> <has_text_matching expression='attr="maxthreads" val0="\d+"'/> <has_text_matching expression='attr="insertThreads" val0="\d+"'/> <has_text_matching expression='attr="maxvirtmem" val0="\d+"'/> <has_text text="hybridScaffold"/> </assert_stdout> </test> </tests> <help><![CDATA[ .. class:: infomark **Purpose** The Hybrid Scaffold pipeline automates the comprehensive scaffolding process and is consisted of five major steps: 1) generate in silico maps for sequence assembly; 2) align in silico sequence maps against Bionano genome maps to identify and resolve potential conflicts in either data set; 3) merge the non-conflicting maps into hybrid scaffolds; 4) align sequence maps to the hybrid scaffolds; and 5) generate AGP and FASTA files for the scaffolds. ---- .. class:: infomark **Coverage** For Hybrid Scaffold, we recommend using as input a minimum of 80X effective molecule coverage in order to build an accurate and contiguous consensus genome map assembly for each enzyme. When using nickases, using more coverage does not significantly improve map contiguity. When using a DLS enzyme such as DLE-1, effective coverage up to and beyond 100X has shown improved map contiguities for some plants and animals. ---- .. class:: infomark **Input Bionano assembly** When running the de novo assembly pipeline for hybrid scaffolding applications, users are recommended to use assembly parameters for non-haplotype-aware assembly. The current Hybrid Scaffold pipeline does not explicitly handle haplotype information and assumes there is only one genome map or NGS sequence contig covering a given genomic region. If multiple haplotypes are present, the pipeline may make false positive conflict cuts and incorrectly mix haplotypes in the final scaffolds. We understand that haplotype information is important in many applications, and a fully haplotype-aware Hybrid Scaffold pipeline is in our roadmap for a future release. ---- .. class:: infomark @BIONANO_SUPPORT_TEXT@ ]]> </help> <expand macro="citations"/> </tool>