# HG changeset patch # User bgruening # Date 1621973572 0 # Node ID 8cc3862f8b8ea211e8ca99f03c25e2d6defc0412 # Parent 295c0e28f4eeeb0e9c9fb9e3a3e16b414780cc14 "planemo upload for repository https://bionanogenomics.com/support/software-downloads/ commit a3d75aba3a21d88adb3706fbcefcaed4fbcb80fe" diff -r 295c0e28f4ee -r 8cc3862f8b8e bionano_scaffold.xml --- a/bionano_scaffold.xml Sun May 23 17:23:03 2021 +0000 +++ b/bionano_scaffold.xml Tue May 25 20:12:52 2021 +0000 @@ -7,6 +7,8 @@ total_contigs_raw.fasta + #if $trim_cut_sites + && python '$__tool_directory__/remove_fake_cut_sites.py' 'total_contigs_raw.fasta' 'total_contigs_trimmed.fasta' 'output.log' + #end if ]]> - + @@ -384,6 +388,7 @@ + - - - + + trim_cut_sites == False + + + trim_cut_sites + + + trim_cut_sites + zip_file - + @@ -434,13 +445,12 @@ - + + - - @@ -464,24 +474,23 @@ - + + - + - - @@ -500,7 +509,7 @@ - + @@ -510,15 +519,14 @@ - + + - - @@ -542,7 +550,7 @@ - + @@ -552,15 +560,14 @@ - + + - - @@ -583,6 +590,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 3.6.1 - galaxy0 + galaxy1 Bionano Genomics has agreed to provide the licensed Bionano Solve software to enable the VGP to package the software in a container. @@ -23,7 +23,7 @@ - bionanodocker/bionano-docker-scaffold:latest + quay.io/galaxy/bionano-docker-scaffold:1.6.01-bio diff -r 295c0e28f4ee -r 8cc3862f8b8e remove_fake_cut_sites.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remove_fake_cut_sites.py Tue May 25 20:12:52 2021 +0000 @@ -0,0 +1,84 @@ +import re +import sys + +from Bio import SeqIO +from Bio.Seq import Seq + + +def main(): + + fasta_file = sys.argv[1] + output_file = sys.argv[2] + log_file = sys.argv[3] + + output_handle = open(output_file, "w") + log_handle = open(log_file, "w") + + with open(fasta_file, "r") as fasta_input_handle: + for record in SeqIO.parse(fasta_input_handle, "fasta"): + + change_count = 0 + cut_sites = [ + Seq("CTTAAG"), + Seq("CTTCTCG"), + Seq("GCTCTTC"), + Seq("CCTCAGC"), + Seq("GAATGC"), + Seq("GCAATG"), + Seq("ATCGAT"), + Seq("CACGAG"), + ] + + for cut_site in cut_sites: + cut_site_both_orientations = (cut_site, cut_site.reverse_complement()) + + for cut_site_for_orientation in cut_site_both_orientations: + + n_flank_length = 1 + search_pattern = ( + "N" * n_flank_length + + str(cut_site_for_orientation) + + "N" * n_flank_length + ) + replacement = "N" * ( + n_flank_length * 2 + len(cut_site_for_orientation) + ) + + (new_string, changes) = re.subn( + search_pattern, + replacement, + str(record.seq.upper()), + flags=re.IGNORECASE, + ) + change_count += changes + + record.seq = Seq(new_string) + + if change_count > 0: + log_handle.write( + " ".join([record.id, ":", str(change_count), "changes\n"]) + ) + SeqIO.write([record], output_handle, "fasta") + + # Finally, count the matches + possible_fake_cut_sites = re.findall( + "N[^N]{1,10}N", str(record.seq.upper()) + ) + if len(possible_fake_cut_sites) > 0: + log_handle.write( + " ".join( + [ + record.id, + ":", + str(len(possible_fake_cut_sites)), + "possible non-standard fake cut sites\n", + ] + ) + ) + + output_handle.close() + log_handle.close() + + +if __name__ == "__main__": + main() diff -r 295c0e28f4ee -r 8cc3862f8b8e test-data/test_05_report.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_05_report.txt Tue May 25 20:12:52 2021 +0000 @@ -0,0 +1,45 @@ +Original BioNano Genome Map statistics: +Count = 2 +Min length (Mbp) = 0.720 +Median length (Mbp) = 2.313 +Mean length (Mbp) = 2.313 +N50 length (Mbp) = 3.906 +Max length (Mbp) = 3.906 +Total length (Mbp) = 4.625 + +Original NGS sequences statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 + +NGS FASTA sequence in hybrid scaffold statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 + +Hybrid scaffold FASTA statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 + +Hybrid scaffold FASTA plus not scaffolded NGS FASTA statistics: +Count = 1 +Min length (Mbp) = 4.753 +Median length (Mbp) = 4.753 +Mean length (Mbp) = 4.753 +N50 length (Mbp) = 4.753 +Max length (Mbp) = 4.753 +Total length (Mbp) = 4.753 +