Previous changeset 3:295c0e28f4ee (2021-05-23) Next changeset 5:96cacb31d571 (2021-05-29) |
Commit message:
"planemo upload for repository https://bionanogenomics.com/support/software-downloads/ commit a3d75aba3a21d88adb3706fbcefcaed4fbcb80fe" |
modified:
bionano_scaffold.xml macros.xml |
added:
remove_fake_cut_sites.py test-data/test_05_report.txt |
b |
diff -r 295c0e28f4ee -r 8cc3862f8b8e bionano_scaffold.xml --- a/bionano_scaffold.xml Sun May 23 17:23:03 2021 +0000 +++ b/bionano_scaffold.xml Tue May 25 20:12:52 2021 +0000 |
[ |
b'@@ -7,6 +7,8 @@\n <expand macro="requirements"/>\n <command detect_errors="exit_code"><![CDATA[\n #set RefAligner = \'/usr/local/bin/RefAligner\'\n+ #set output_file_NCBI = \'hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD_NCBI.fasta\'\n+ #set output_file_not_scaffolded = \'hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD_NOT_SCAFFOLDED.fasta\'\n ## softlinks do not work\n cp \'${ngs_fasta}\' ./ngs.fasta\n && cp \'${bionano_cmap}\' ./bionano.cmap\n@@ -33,8 +35,7 @@\n -r $RefAligner\n #if $conflict_resolution\n -M \'${conflict_resolution}\'\n- #end if\n- #if not $conflict_resolution\n+ #else\n -B $conflict_filter_genome\n -N $conflict_filter_sequence\n #end if\n@@ -54,7 +55,10 @@\n -f\n $zip_file\n -o ./\n-\n+ && cat $output_file_NCBI $output_file_not_scaffolded > total_contigs_raw.fasta\n+ #if $trim_cut_sites\n+ && python \'$__tool_directory__/remove_fake_cut_sites.py\' \'total_contigs_raw.fasta\' \'total_contigs_trimmed.fasta\' \'output.log\'\n+ #end if \n ]]> </command>\n <configfiles>\n <configfile name="vgp_mode"><![CDATA[\n@@ -70,7 +74,7 @@\n <flag attr="RAmem" val0="3" val1="1"/>\n </global>\n <fasta2cmap>\n- <flag attr="enzyme" val0="$configuration_options.enzyme" display="Enzyme" group="FASTA to CMAP digestion" description="Define single enzyme for in-silico FASTA to CMAP digestion. Avalible enzymes: BspQI, BbvCI, BsmI, BsrDI, BssSI, DLE1."/>\n+ <flag attr="enzyme" val0="$configuration_options.enzyme" display="Enzyme" group="FASTA to CMAP digestion" description="Define single enzyme for in-silico FASTA to CMAP digestion. Available enzymes: BspQI, BbvCI, BsmI, BsrDI, BssSI, DLE1."/>\n <flag attr="channelNum" val0="1" display="Channel number" group="FASTA to CMAP digestion" description="Specify the channel the enzyme was used."/>\n <flag attr="minLabels" val0="0" display="Minimum label sites" group="FASTA to CMAP digestion" description="Specify minimum number of label sites per digested contig."/>\n <flag attr="minLength" val0="0" display="Minimum length (Kb)" group="FASTA to CMAP digestion" description="Specify minimum length in Kb of each digested contig."/>\n@@ -384,6 +388,7 @@\n <option value="3">Exclude conflicting contig</option>\n </param>\n <param name="zip_file" argument="-z" type="boolean" truevalue="-z results.zip" falsevalue="" checked="false" label="Generate an output package in ZIP format" help="The hybrid scaffold output package (.zip) can be imported into Access for visualization" />\n+ <param name="trim_cut_sites" type="boolean" checked="true" label="Remove BioNano cut sites" help="This option removes the spurious BioNano cut sites that are inserted into gaps in some assemblies, replacing them with Ns." />\n <!-- \n \n Those options have been disabled because the Docker container doesn\'t include the required packages\n@@ -415,16 +420,22 @@\n -->\n </inputs>\n <outputs>\n- <data name="ngs_contigs_scaffold_fasta" format="fasta" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD.fasta" label="${tool.name} on ${on_string}: NGScontigs hybrid scaffold (fasta)"/>\n- <data name="ngs_contigs_scaffold_agp" format="txt" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD.agp" label="${tool.name} on ${on_string}: NGScontigs hybrid scaffold (agp)"/>\n- <data name="ngs_contigs_scaffold_gap" format="txt" from_work_dir="hybrid_scaffolds/bionano_bppAdjust_cmap_ngs_fasta_NGScontigs_HYBRID_SCAFFOLD.gap" label="${tool.name} on ${on_string}: NGScontigs hybrid scaffold (gap)"/'..b'aram name="zip_file" value="true"/>\n- <output name="ngs_contigs_scaffold_fasta" ftype="fasta">\n+ <param name="trim_cut_sites" value="false"/>\n+ <output name="ngs_contigs" ftype="fasta">\n <assert_contents>\n <has_size value="4753369" delta="100" />\n <has_n_lines n="2"/>\n <has_line line=">Super-Scaffold_1"/>\n </assert_contents>\n </output>\n- <output name="ngs_contigs_scaffold_agp" file="test_03.agp" ftype="txt"/>\n- <output name="ngs_contigs_scaffold_gap" file="test_03.gap" ftype="txt"/>\n <output name="report" file="test_03_report.txt" ftype="txt"/>\n <output name="results" ftype="zip">\n <assert_contents>\n@@ -542,7 +550,7 @@\n <has_text text="hybridScaffold"/>\n </assert_stdout>\n </test>\n- <test expect_num_outputs="5">\n+ <test expect_num_outputs="3">\n <param name="ngs_fasta" value="assembly.fasta.gz"/>\n <param name="bionano_cmap" value="colormap_assembly.cmap"/>\n <param name="conflict_filter_genome" value="2"/>\n@@ -552,15 +560,14 @@\n <param name="enzyme" value="BspQI"/>\n </conditional>\n <param name="zip_file" value="true"/>\n- <output name="ngs_contigs_scaffold_fasta" ftype="fasta">\n+ <param name="trim_cut_sites" value="false"/>\n+ <output name="ngs_contigs" ftype="fasta">\n <assert_contents>\n <has_size value="4753369" delta="100" />\n <has_n_lines n="2"/>\n <has_line line=">Super-Scaffold_1"/>\n </assert_contents>\n </output>\n- <output name="ngs_contigs_scaffold_agp" file="test_04.agp" ftype="txt"/>\n- <output name="ngs_contigs_scaffold_gap" file="test_04.gap" ftype="txt"/>\n <output name="report" file="test_04_report.txt" ftype="txt"/>\n <output name="results" ftype="zip">\n <assert_contents>\n@@ -583,6 +590,43 @@\n <has_text text="hybridScaffold"/>\n </assert_stdout>\n </test>\n+ <test expect_num_outputs="3">\n+ <param name="ngs_fasta" value="assembly.fasta.gz"/>\n+ <param name="bionano_cmap" value="colormap_assembly.cmap"/>\n+ <param name="conflict_filter_genome" value="3"/>\n+ <param name="conflict_filter_sequence" value="3"/>\n+ <conditional name="configuration_options">\n+ <param name="configuration" value="file"/>\n+ <param name="configuration_file" value="configuration.xml"/>\n+ </conditional>\n+ <param name="trim_cut_sites" value="true"/>\n+ <output name="ngs_contigs_trimmed" ftype="fasta">\n+ <assert_contents>\n+ <has_size value="4832591" delta="300" />\n+ </assert_contents>\n+ </output>\n+ <output name="ngs_contigs_trimmed_report" ftype="txt">\n+ <assert_contents>\n+ <has_size value="0" />\n+ </assert_contents>\n+ </output>\n+ <output name="report" file="test_05_report.txt" ftype="txt"/>\n+ <assert_stdout>\n+ <has_text text=\'attr="maxmem" val0="8"\'/>\n+ </assert_stdout>\n+ <assert_stdout>\n+ <has_text text=\'attr="maxthreads" val0="1"\'/>\n+ </assert_stdout>\n+ <assert_stdout>\n+ <has_text text=\'attr="insertThreads" val0="1"\'/>\n+ </assert_stdout>\n+ <assert_stdout>\n+ <has_text text=\'attr="maxvirtmem" val0="8"\'/>\n+ </assert_stdout>\n+ <assert_stdout>\n+ <has_text text="hybridScaffold"/>\n+ </assert_stdout>\n+ </test>\n </tests>\n <help><![CDATA[\n .. class:: infomark\n' |
b |
diff -r 295c0e28f4ee -r 8cc3862f8b8e macros.xml --- a/macros.xml Sun May 23 17:23:03 2021 +0000 +++ b/macros.xml Tue May 25 20:12:52 2021 +0000 |
b |
@@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">3.6.1</token> - <token name="@GALAXY_TOOL_VERSION@">galaxy0</token> + <token name="@GALAXY_TOOL_VERSION@">galaxy1</token> <token name="@BIONANO_SUPPORT_TEXT@"> Bionano Genomics has agreed to provide the licensed Bionano Solve software to enable the VGP to package the software in a container. @@ -23,7 +23,7 @@ </xml> <xml name="requirements"> <requirements> - <container type="docker">bionanodocker/bionano-docker-scaffold:latest</container> + <container type="docker">quay.io/galaxy/bionano-docker-scaffold:1.6.01-bio</container> </requirements> </xml> <macro name="sanitize_string" > |
b |
diff -r 295c0e28f4ee -r 8cc3862f8b8e remove_fake_cut_sites.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remove_fake_cut_sites.py Tue May 25 20:12:52 2021 +0000 |
[ |
@@ -0,0 +1,84 @@ +import re +import sys + +from Bio import SeqIO +from Bio.Seq import Seq + + +def main(): + + fasta_file = sys.argv[1] + output_file = sys.argv[2] + log_file = sys.argv[3] + + output_handle = open(output_file, "w") + log_handle = open(log_file, "w") + + with open(fasta_file, "r") as fasta_input_handle: + for record in SeqIO.parse(fasta_input_handle, "fasta"): + + change_count = 0 + cut_sites = [ + Seq("CTTAAG"), + Seq("CTTCTCG"), + Seq("GCTCTTC"), + Seq("CCTCAGC"), + Seq("GAATGC"), + Seq("GCAATG"), + Seq("ATCGAT"), + Seq("CACGAG"), + ] + + for cut_site in cut_sites: + cut_site_both_orientations = (cut_site, cut_site.reverse_complement()) + + for cut_site_for_orientation in cut_site_both_orientations: + + n_flank_length = 1 + search_pattern = ( + "N" * n_flank_length + + str(cut_site_for_orientation) + + "N" * n_flank_length + ) + replacement = "N" * ( + n_flank_length * 2 + len(cut_site_for_orientation) + ) + + (new_string, changes) = re.subn( + search_pattern, + replacement, + str(record.seq.upper()), + flags=re.IGNORECASE, + ) + change_count += changes + + record.seq = Seq(new_string) + + if change_count > 0: + log_handle.write( + " ".join([record.id, ":", str(change_count), "changes\n"]) + ) + SeqIO.write([record], output_handle, "fasta") + + # Finally, count the matches + possible_fake_cut_sites = re.findall( + "N[^N]{1,10}N", str(record.seq.upper()) + ) + if len(possible_fake_cut_sites) > 0: + log_handle.write( + " ".join( + [ + record.id, + ":", + str(len(possible_fake_cut_sites)), + "possible non-standard fake cut sites\n", + ] + ) + ) + + output_handle.close() + log_handle.close() + + +if __name__ == "__main__": + main() |
b |
diff -r 295c0e28f4ee -r 8cc3862f8b8e test-data/test_05_report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_05_report.txt Tue May 25 20:12:52 2021 +0000 |
b |
@@ -0,0 +1,45 @@ +Original BioNano Genome Map statistics: +Count = 2 +Min length (Mbp) = 0.720 +Median length (Mbp) = 2.313 +Mean length (Mbp) = 2.313 +N50 length (Mbp) = 3.906 +Max length (Mbp) = 3.906 +Total length (Mbp) = 4.625 + +Original NGS sequences statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 + +NGS FASTA sequence in hybrid scaffold statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 + +Hybrid scaffold FASTA statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 + +Hybrid scaffold FASTA plus not scaffolded NGS FASTA statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 + |