Repository 'doclayoutyolo'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/doclayoutyolo

Changeset 0:28b4dc80d58b (2025-06-13)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/image_processing/yolo-utils/doclayoutyolo commit 80167f52fb9bd60b57e4df9d68152876171228d6
added:
doclayoutyolo.xml
segment_text_yolo.py
test-data/input_image_png.png
test-data/input_image_tiff.tif
b
diff -r 000000000000 -r 28b4dc80d58b doclayoutyolo.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/doclayoutyolo.xml Fri Jun 13 14:54:15 2025 +0000
[
@@ -0,0 +1,124 @@
+<tool id="doclayoutyolo" name="DocLayout-YOLO" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Enhancing document layout analysis</description>
+    <macros>
+        <token name="@TOOL_VERSION@">0.0.4.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">24.2</token>
+    </macros>
+    <creator>
+        <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/"/>
+        <person givenName="Anup" familyName="Kumar" email="kumara@informatik.uni-freiburg.de"/>
+    </creator>
+    <requirements>
+        <container type="docker">quay.io/galaxy/doclayout-yolo:@TOOL_VERSION@</container>
+    </requirements>
+    <required_files>
+        <include path="segment_text_yolo.py"/>
+    </required_files>
+    <command detect_errors="aggressive"><![CDATA[
+    python '$__tool_directory__/segment_text_yolo.py'
+            --yolo_model '$input_yolo_model'
+            --input_image '$input_image'
+            --input_image_ext '$input_image.ext'
+            --input_confidence '$input_confidence'
+            --input_image_size '$input_image_size'
+            --output_image '$output_image'
+            --output_geojson '$output_segmentation_coordinates'
+]]>
+    </command>
+    <inputs>
+        <param name="input_yolo_model" type="data" format="zip" label="Yolo model" help="Please upload a Yolo model."/>
+        <param name="input_image" type="data" format="tiff,jpg,png" label="Input image" help="Please provide an input image for the analysis."/>
+        <param name="input_confidence" type="float" label="Confidence" value="0.5" min="0.0" max="1.0" help="Set confidence threshold between 0.0 and 1.0 for drawing bounding boxes. Higher values indicate higher probablity of segmentation."/>
+        <param name="input_image_size" type="integer" label="Image size" value="1024" min="1" max="1500" help="Set input image size for image resize by Doclayout Yolo model. Larger values may provide better accuracy in segmentation but could be slower. Lower values might be faster with lower accuracy."/>
+    </inputs>
+    <outputs>
+        <data format_source="input_image" name="output_image" label="Segmented image"></data>
+        <data format="geojson" name="output_segmentation_coordinates" label="Segmented coordinates"></data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_yolo_model" value="input_yolo_model.zip" location="https://huggingface.co/juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained/resolve/main/doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.pt?download=true"/>
+            <param name="input_image" value="input_image_png.png"/>
+            <param name="input_confidence" value="0.5"/>
+            <param name="input_image_size" value="1024"/>
+            <output name="output_image" ftype="png">
+                <assert_contents>
+                    <has_size size="920950" delta="100" />
+                </assert_contents>
+            </output>
+            <output name="output_segmentation_coordinates" ftype="geojson">
+                <assert_contents>
+                    <has_text text="Polygon" />
+                    <has_text text="Feature" />
+                    <has_text text="coordinates" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_yolo_model" value="input_yolo_model.zip" location="https://huggingface.co/juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained/resolve/main/doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.pt?download=true"/>
+            <param name="input_image" value="input_image_jpg.jpg" location="https://zenodo.org/records/15649779/files/input_image_jpg.jpg?download=1"/>
+            <param name="input_confidence" value="0.5"/>
+            <param name="input_image_size" value="1024"/>
+            <output name="output_image" ftype="jpg">
+                <assert_contents>
+                    <has_size size="2753175" delta="100" />
+                </assert_contents>
+            </output>
+            <output name="output_segmentation_coordinates" ftype="geojson">
+                <assert_contents>
+                    <has_text text="Polygon" />
+                    <has_text text="Feature" />
+                    <has_text text="coordinates" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_yolo_model" value="input_yolo_model.zip" location="https://huggingface.co/juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained/resolve/main/doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.pt?download=true"/>
+            <param name="input_image" value="input_image_tiff.tif"/>
+            <param name="input_confidence" value="0.5"/>
+            <param name="input_image_size" value="1024"/>
+            <output name="output_image" ftype="tiff">
+                <assert_contents>
+                    <has_size size="510756" delta="100" />
+                </assert_contents>
+            </output>
+            <output name="output_segmentation_coordinates" ftype="geojson">
+                <assert_contents>
+                    <has_text text="Polygon" />
+                    <has_text text="Feature" />
+                    <has_text text="coordinates" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+**What it does**
+
+The tool takes a Yolo model trained for annotating bounding boxes around text. It takes a pretrained Yolo model and predicts bounding boxes in the input image where any text is found.
+It is based on document layout analysis: https://github.com/opendatalab/DocLayout-YOLO. The Yolo model can be downloaded from: https://huggingface.co/juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained/tree/main

+
+**Input files**
+  - Yolo model (as `.pt` file)
+  - Input image containing text
+  - Confidence score to be used for drawing bounding boxes
+  - Image size to be resized to by Yolo model
+
+**Output files**
+  - Segmented image
+  - Coordinates of bounding boxes as Geojson file
+
+        ]]>
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @ARTICLE{zhao2024doclayoutyoloenhancingdocumentlayout,
+                Author = {Zhao, Zhiyuan and et al.},
+                title = {{DocLayout-YOLO: Enhancing Document Layout Analysis through Diverse Synthetic Data and Global-to-Local Adaptive Perception}},
+                url = {https://github.com/opendatalab/DocLayout-YOLO}
+            }
+        </citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 28b4dc80d58b segment_text_yolo.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/segment_text_yolo.py Fri Jun 13 14:54:15 2025 +0000
[
@@ -0,0 +1,88 @@
+"""
+Segment text using DocLayout Yolo model
+"""
+
+import argparse
+import json
+import os
+
+import cv2
+from doclayout_yolo import YOLOv10
+from geojson import Feature, FeatureCollection
+from shapely.geometry import box, mapping
+
+
+def load_model_and_predict(
+    model_path, input_image_path, input_confidence, image_size, output_image_path
+):
+
+    model = YOLOv10(model=model_path)
+
+    det_res = model.predict(
+        input_image_path, imgsz=int(image_size), conf=float(input_confidence)
+    )
+    annotated_frame = det_res[0].plot(pil=True, line_width=5, font_size=20)
+    cv2.imwrite(output_image_path, annotated_frame)
+    return det_res[0]
+
+
+def extract_bb_crop(results, output_segmentation_coordiates):
+    bounding_boxes = []
+    features = []
+    for bx in results.boxes.xyxy.cpu().numpy():
+        x1, y1, x2, y2 = bx
+        bounding_boxes.append((x1, y1, x2, y2))
+
+    for i, (x1, y1, x2, y2) in enumerate(bounding_boxes):
+        poly = box(x1, y1, x2, y2)
+        feature = Feature(geometry=mapping(poly), properties={"id": i})
+        features.append(feature)
+
+    geojson_obj = FeatureCollection(features)
+
+    with open(output_segmentation_coordiates, "w") as f:
+        json.dump(geojson_obj, f)
+
+
+if __name__ == "__main__":
+    arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument(
+        "-im", "--yolo_model", required=True, help="Input Yolo model"
+    )
+    arg_parser.add_argument(
+        "-ii", "--input_image", required=True, help="Input image file"
+    )
+    arg_parser.add_argument(
+        "-ie", "--input_image_ext", required=True, help="Input image file extension"
+    )
+    arg_parser.add_argument(
+        "-ic", "--input_confidence", required=True, help="Input confidence"
+    )
+    arg_parser.add_argument(
+        "-is", "--input_image_size", required=True, help="Input image size"
+    )
+    arg_parser.add_argument("-oi", "--output_image", required=True, help="Output image")
+    arg_parser.add_argument(
+        "-ogj", "--output_geojson", required=True, help="Output segmented coordinates"
+    )
+    args = vars(arg_parser.parse_args())
+    model_path = args["yolo_model"]
+    input_image_path = args["input_image"]
+    input_ext = args["input_image_ext"]
+    confidence = args["input_confidence"]
+    image_size = args["input_image_size"]
+    output_image_path = args["output_image"]
+    output_segmentation_coordiates = args["output_geojson"]
+
+    model_link = "yolo_model.pt"
+    input_image = f"input_image.{input_ext}"
+    output_image = f"output_image.{input_ext}"
+
+    os.symlink(model_path, model_link)
+    os.symlink(input_image_path, input_image)
+    os.symlink(output_image_path, output_image)
+
+    segmented_image = load_model_and_predict(
+        model_link, input_image, confidence, image_size, output_image
+    )
+    extract_bb_crop(segmented_image, output_segmentation_coordiates)
b
diff -r 000000000000 -r 28b4dc80d58b test-data/input_image_png.png
b
Binary file test-data/input_image_png.png has changed
b
diff -r 000000000000 -r 28b4dc80d58b test-data/input_image_tiff.tif
b
Binary file test-data/input_image_tiff.tif has changed