changeset 0:4eadb56fa314 draft

Uploaded
author fubar
date Fri, 14 Jun 2024 06:06:45 +0000
parents
children 49c6f715bc82
files bbgbigwig/.shed.yml bbgbigwig/bam_bed_gff_to_bigwig.xml bbgbigwig/gff_to_bed_converter.py bbgbigwig/test-data/dbkeys.loc.test bbgbigwig/test-data/featureCounts_input1.bam bbgbigwig/test-data/featureCounts_input1.bigwig bbgbigwig/test-data/merlin.bed.bigwig bbgbigwig/test-data/merlin.gff.bigwig bbgbigwig/test-data/srma_out2.bam bbgbigwig/test-data/srma_out2.bigwig bbgbigwig/test-data/test-6.bed bbgbigwig/test-data/test5.bed bbgbigwig/test-data/test5.bed.bigwig bbgbigwig/test-data/test5.gff.bigwig bbgbigwig/test-data/test5.gff3 bbgbigwig/test-data/testing.len bbgbigwig/tool-data/dbkeys.loc.sample bbgbigwig/tool-data/testing.len bbgbigwig/tool_data_table_conf.xml.sample bbgbigwig/tool_data_table_conf.xml.test
diffstat 20 files changed, 282 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/.shed.yml	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,11 @@
+name: bbgbigwig
+owner: iuc
+categories:
+- Convert Formats
+description: Make a coverage bigwig from bam, bed or gff, optionally with a chromosome length file.
+long_description: |
+  Designed for assembly visualisation JBrowse2 workflows where dbkey is not ready to be assigned even with a custom
+  genome. A bigwig track is very useful when there are too many features to display
+type: unrestricted
+remote_repository_url: https://www.encodeproject.org/software/bedgraphtobigwig/
+homepage_url: https://www.encodeproject.org/software/bedgraphtobigwig/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/bam_bed_gff_to_bigwig.xml	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,103 @@
+<tool id="bbgtobigwig" name="Convert BAM/BED/GFF to BigWig" version="0.1" profile="22.05"> 
+    <requirements>
+        <requirement type="package" version="455">ucsc-bedgraphtobigwig</requirement>
+        <requirement type="package" version="2.31.1">bedtools</requirement>
+        <requirement type="package" version="9.5">coreutils</requirement> 
+        <requirement type="package" version="3.12.3">python</requirement>
+    </requirements>
+    <required_files>
+        <include path="gff_to_bed_converter.py"/>
+    </required_files>
+    <command detect_errors="aggressive"><![CDATA[
+#if $hist_or_builtin.genosrc == "indexed":
+    ln -s '$hist_or_builtin.chromfile.fields.len_path' ./CHROMFILE &&
+#else:
+    ln -s '$chromfile' ./CHROMFILE &&
+#end if
+#if $input1.ext in ['gff', 'gff3']:
+    python '$__tool_directory__/gff_to_bed_converter.py' < '$input1' > input2 && 
+#else:
+    ln -s '$input1' input2 &&
+#end if
+#if $input1.ext == "bam":
+    bedtools genomecov -bg -split -ibam input2 | 
+#else
+    bedtools genomecov -bg -i input2 -g ./CHROMFILE |
+#end if
+LC_COLLATE=C sort -k1,1 -k2,2n > temp.bg &&
+bedGraphToBigWig temp.bg ./CHROMFILE '$output'
+    ]]></command>
+    <inputs>
+        <conditional name="hist_or_builtin">
+            <param name="genosrc" type="select" label="Is the input assigned to a built-in or custom reference genome?" 
+                help="If the input has no dbkey, supply a chromosome lengths file">
+                <option selected="True" value="indexed">Input data was made with a built-in genome or already has a custom genome dbkey</option>
+                <option value="history">Input data mapped on a genome from the current history. The chromosome lengths file is also in the history</option>
+            </param>
+            <when value="indexed">
+                <param name="input1" type="data" format="bam,unsorted.bam,bed,gff,gff3" label="bam/bed/gff to convert">
+                    <validator type="unspecified_build" />
+                </param>
+                <param name="chromfile" type="select" label="Source Genome Build">
+                    <options from_data_table="__dbkeys__">
+                        <filter type="data_meta" column="0" key="dbkey" ref="input1"/>
+                    </options>
+                    <validator type="no_options" message="The chosen genome build is not available."/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="input1" type="data" format="bam,unsorted.bam,bed,gff,gff3" label="bam/bed/gff to convert"/>
+                <param name="chromfile" type="data" format="len,txt,tabular" label="Chromosome length file" 
+                   help="Sequence lengths for the history reference are required to make a bigwig. Compute sequence length tool makes these from fasta files"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="bigwig"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="hist_or_builtin">
+                <param name="genosrc" value="indexed"/>
+                <param name="input1" value="featureCounts_input1.bam" dbkey="hg38"/>
+                <param name="chromfile" value="hg38"/>
+            </conditional>
+            <output name="output" value="featureCounts_input1.bigwig" compare="sim_size"/>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="hist_or_builtin">
+                <param name="genosrc" value="history"/>
+                <param name="input1" value="srma_out2.bam"/>
+                <param name="chromfile" value="testing.len"/>
+            </conditional>
+            <output name="output" value="srma_out2.bigwig" compare="sim_size"/>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="hist_or_builtin">
+                <param name="genosrc" value="history"/>
+                <param name="input1" value="test5.gff3"/>
+                <param name="chromfile" value="testing.len"/>
+            </conditional>
+            <output name="output" value="test5.gff.bigwig" compare="sim_size"/>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="hist_or_builtin">
+                <param name="genosrc" value="history"/>
+                <param name="input1" value="test5.bed"/>
+                <param name="chromfile" value="testing.len"/>
+            </conditional>
+            <output name="output" value="test5.bed.bigwig" compare="sim_size"/>
+        </test>
+    </tests>
+    <help>
+
+   Converter for bam, bed or gff to bigwig
+   If the input does not have a dbkey, a chromosome lengths file is needed.
+   This can be useful in workflows with assemblies in progress before a stable reference is available for
+   a custom or built in reference dbkey.
+
+    </help>    
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq351</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/gff_to_bed_converter.py	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+import sys
+
+assert sys.version_info[:2] >= (2, 6)
+
+
+def __main__():
+    skipped_lines = 0
+    first_skipped_line = None
+    # was sys.argv[2] but we need stdout for a pipe in bam_bed_gff_to_bigwig.xml
+    for i, line in enumerate(sys.stdin):
+        line = line.rstrip("\r\n")
+        if line and not line.startswith("#"):
+            try:
+                elems = line.split("\t")
+                start = str(int(elems[3]) - 1)
+                endoff = str(int(elems[4]) - 1)
+                # GFF format: chrom, source, name, chromStart, chromEnd, score, strand
+                # bedtools puts out only 4 fields: chrom, chromStart, chromEnd, score
+                sys.stdout.write(f"{elems[0]}\t{start}\t{endoff}\t0\n")
+            except Exception:
+                skipped_lines += 1
+                if not first_skipped_line:
+                    first_skipped_line = i + 1
+        else:
+            skipped_lines += 1
+            if not first_skipped_line:
+                first_skipped_line = i + 1
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/test-data/dbkeys.loc.test	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,2 @@
+#<dbkey>	<display_name>	<len_file_path>
+hg38	hg38	${__HERE__}/testing.len
Binary file bbgbigwig/test-data/featureCounts_input1.bam has changed
Binary file bbgbigwig/test-data/featureCounts_input1.bigwig has changed
Binary file bbgbigwig/test-data/merlin.bed.bigwig has changed
Binary file bbgbigwig/test-data/merlin.gff.bigwig has changed
Binary file bbgbigwig/test-data/srma_out2.bam has changed
Binary file bbgbigwig/test-data/srma_out2.bigwig has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/test-data/test-6.bed	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,3 @@
+Merlin	49	1452	chromosomal_replication_initiator_protein_DnaA	0	+
+Merlin	1457	2557	DNA_polymerase_III_subunit_beta	0	+
+Merlin	2557	3630	DNA_replication_and_repair_protein_RecF	0	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/test-data/test5.bed	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,17 @@
+chr7	115444712	115444739	CCDS5763.1_cds_0_0_chr7_115444713_f	0	+
+chr7	115468538	115468624	CCDS5763.1_cds_1_0_chr7_115468539_f	0	+
+chr7	115483024	115483277	CCDS5763.1_cds_2_0_chr7_115483025_f	0	+
+chr7	115484165	115484501	CCDS5763.1_cds_3_0_chr7_115484166_f	0	+
+chr7	115485764	115485980	CCDS5763.1_cds_4_0_chr7_115485765_f	0	+
+chr7	115486322	115486481	CCDS5763.1_cds_5_0_chr7_115486323_f	0	+
+chr7	115491298	115491487	CCDS5763.1_cds_6_0_chr7_115491299_f	0	+
+chr7	115468538	115468624	CCDS5764.1_cds_0_0_chr7_115468539_f	0	+
+chr7	115483024	115483277	CCDS5764.1_cds_1_0_chr7_115483025_f	0	+
+chr7	115484165	115484501	CCDS5764.1_cds_2_0_chr7_115484166_f	0	+
+chr7	115485764	115485980	CCDS5764.1_cds_3_0_chr7_115485765_f	0	+
+chr7	115486322	115486481	CCDS5764.1_cds_4_0_chr7_115486323_f	0	+
+chr7	115491298	115491487	CCDS5764.1_cds_5_0_chr7_115491299_f	0	+
+chr7	115733786	115733936	CCDS5766.1_cds_0_0_chr7_115733787_f	0	+
+chr7	115734264	115734452	CCDS5766.1_cds_1_0_chr7_115734265_f	0	+
+chr7	115739975	115740126	CCDS5766.1_cds_2_0_chr7_115739976_f	0	+
+chr7	115733786	115733936	CCDS5765.1_cds_0_0_chr7_115733787_f	0	+
Binary file bbgbigwig/test-data/test5.bed.bigwig has changed
Binary file bbgbigwig/test-data/test5.gff.bigwig has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/test-data/test5.gff3	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,38 @@
+##gff-version 3
+##date Tue Jun 26 10:48:17 2007
+##sequence-region ctgA 1 50000
+##source gbrowse GFFToGalaxyDumper plugin
+##NOTE: All features dumped.
+ctgA	example	my_feature	22132	24633	.	+	.	ID=My_feature:f15
+ctgA	example	my_feature	46990	48410	.	-	.	ID=My_feature:f11
+ctgA	example	my_feature	44705	47713	.	-	.	ID=My_feature:f01
+ctgA	example	my_feature	36649	40440	.	-	.	ID=My_feature:f03
+ctgA	example	my_feature	23072	23185	.	+	.	ID=My_feature:f14
+ctgA	example	my_feature	37242	38653	.	+	.	ID=My_feature:f04
+ctgA	example	motif	37497	40559	.	-	.	ID=Motif:m15;Note=7-transmembrane
+ctgA	example	my_feature	36034	38167	.	+	.	ID=My_feature:f09
+ctgA	example	motif	28332	30033	.	-	.	ID=Motif:m02;Note=HOX
+ctgA	example	my_feature	4715	5968	.	-	.	ID=My_feature:f05
+ctgA	example	motif	48253	48366	.	+	.	ID=Motif:m01;Note=WD40
+ctgA	example	BAC	1000	20000	.	.	.	ID=BAC:b101.2;Note=Fingerprinted+BAC+with+end+reads
+ctgA	example	right_end_read	19500	20000	.	-	.	Parent=BAC:b101.2
+ctgA	example	left_end_read	1000	1500	.	+	.	Parent=BAC:b101.2
+ctgA	example	motif	13801	14007	.	-	.	ID=Motif:m05;Note=helix+loop+helix
+ctgA	example	coding	1050	9000	.	+	.	ID=mRNA:EDEN.1;Gene=EDEN
+ctgA	example	CDS	1201	1500	.	+	0	Parent=mRNA:EDEN.1
+ctgA	example	CDS	3000	3902	.	+	0	Parent=mRNA:EDEN.1
+ctgA	example	CDS	5000	5500	.	+	0	Parent=mRNA:EDEN.1
+ctgA	example	CDS	7000	7608	.	+	0	Parent=mRNA:EDEN.1
+ctgA	example	processed_transcript	1050	9000	.	+	.	ID=mRNA:EDEN.1
+ctgA	example	5'-UTR	1050	1200	.	+	.	Parent=mRNA:EDEN.1
+ctgA	example	3'-UTR	7609	9000	.	+	.	Parent=mRNA:EDEN.1
+ctgA	est	match	5410	7503	.	-	.	ID=EST:agt830.3;Target=agt830.3+1+595
+ctgA	est	HSP	7000	7503	.	-	.	Parent=EST:agt830.3;Target=agt830.3+1+504
+ctgA	est	HSP	5410	5500	.	-	.	Parent=EST:agt830.3;Target=agt830.3+505+595
+ctgA	example	motif	46012	48851	.	+	.	ID=Motif:m09;Note=kinase
+ctgA	example	match	6885	8999	.	-	.	ID=Match:seg03
+ctgA	example	HSP	8306	8999	.	-	.	Parent=Match:seg03
+ctgA	example	HSP	8055	8080	.	-	.	Parent=Match:seg03
+ctgA	example	HSP	7410	7737	.	-	.	Parent=Match:seg03
+ctgA	example	HSP	6885	7241	.	-	.	Parent=Match:seg03
+ctgA	example	my_feature	13280	16394	.	+	.	ID=My_feature:f08
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/test-data/testing.len	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,30 @@
+dummy_chr	100000000
+chr1	1000000000
+X	1000000000
+16	1000000000
+ctgA	1000000000
+Merlin	10000000
+super_1	1000000000
+chr1	1000000000
+chr7	2000000000
+chrX	2000000000
+phiX174	100000000
+random_phiX_region_1	100000000
+random_phiX_region_2	100000000
+random_phiX_region_3	100000000
+random_phiX_region_4	100000000
+random_phiX_region_5	100000000
+random_phiX_region_6	100000000
+random_phiX_region_7	100000000
+random_phiX_region_8	100000000
+random_phiX_region_9	100000000
+random_phiX_region_10	100000000
+random_phiX_region_11	100000000
+random_phiX_region_12	100000000
+random_phiX_region_13	100000000
+random_phiX_region_14	100000000
+random_phiX_region_15	100000000
+random_phiX_region_16	100000000
+random_phiX_region_17	100000000
+random_phiX_region_18	100000000
+random_phiX_region_19	100000000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/tool-data/dbkeys.loc.sample	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,1 @@
+#<dbkey>	<display_name>	<len_file_path>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/tool-data/testing.len	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,30 @@
+dummy_chr	100000000
+chr1	1000000000
+X	1000000000
+16	1000000000
+ctgA	1000000000
+Merlin	10000000
+super_1	1000000000
+chr1	1000000000
+chr7	2000000000
+chrX	20000000000
+phiX174	100000000
+random_phiX_region_1	100000000
+random_phiX_region_2	100000000
+random_phiX_region_3	100000000
+random_phiX_region_4	100000000
+random_phiX_region_5	100000000
+random_phiX_region_6	100000000
+random_phiX_region_7	100000000
+random_phiX_region_8	100000000
+random_phiX_region_9	100000000
+random_phiX_region_10	100000000
+random_phiX_region_11	100000000
+random_phiX_region_12	100000000
+random_phiX_region_13	100000000
+random_phiX_region_14	100000000
+random_phiX_region_15	100000000
+random_phiX_region_16	100000000
+random_phiX_region_17	100000000
+random_phiX_region_18	100000000
+random_phiX_region_19	100000000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/tool_data_table_conf.xml.sample	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="tool-data/dbkeys.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bbgbigwig/tool_data_table_conf.xml.test	Fri Jun 14 06:06:45 2024 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, len_path</columns>
+        <file path="${__HERE__}/test-data/dbkeys.loc.test" />
+    </table>
+</tables>