Mercurial > repos > iuc > gffcompare

--- a/gffcompare.xml	Mon May 27 13:54:15 2019 -0400
+++ b/gffcompare.xml	Thu Oct 17 03:01:26 2019 -0400
@@ -3,7 +3,7 @@
     <macros>
         <token name="@GFFCOMPARE_VERSION@">0.11.2</token>
     </macros>
-	<requirements>
+    <requirements>
         <requirement type="package" version="@GFFCOMPARE_VERSION@">gffcompare</requirement>
     </requirements>
     <version_command>gffcompare -v | awk '{print $2}'</version_command>
@@ -49,6 +49,7 @@

 $discard_single_exon
 $discard_duplicates
+$no_merge
 -e $max_dist_exon
 -d $max_dist_group
 $chr_stats
@@ -79,7 +80,7 @@
                     <when value="cached">
                         <param argument="-r" label="Using reference annotation" name="index" type="select">
                             <options from_data_table="gene_sets">
-                                <filter column="1" key="dbkey" ref="gffinputs" type="data_meta" />
+                                <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" />
                             </options>
                             <validator message="No reference annotation is available for the build associated with the selected input dataset" type="no_options" />
                         </param>
@@ -117,7 +118,7 @@
                     <when value="cached">
                         <param argument="-s" label="Using reference genome" name="index" type="select">
                             <options from_data_table="fasta_indexes">
-                                <filter column="1" key="dbkey" ref="gffinputs" type="data_meta" />
+                                <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" />
                             </options>
                             <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
                         </param>
@@ -143,7 +144,7 @@
         <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" />
         <param name="chr_stats" argument="--chr-stats" type="boolean" checked="false" truevalue="--chr-stats" falsevalue="" label="Show summary and accuracy data separately for each reference sequence in the transcript accuracy data set" />
         <section name="adv_output" title="Options for the combined GTF output file">
-            <param argument="-p"  type="text" value="TCONS" label="name prefix for consensus transcripts" help="for combined.gtf (default: 'TCONS')" />
+            <param argument="-p"  type="text" value="TCONS" label="name prefix for consensus transcripts" help="for combined.gtf" />
             <param argument="-C"  type="boolean" checked="false" truevalue="-C" falsevalue=""  label="discard matching and 'contained' transfrags" help="i.e. collapse intron-redundant transfrags across all query files" />
             <param argument="-A"  type="boolean" checked="false" truevalue="-A" falsevalue=""  label="discard the 'contained' transfrags except intron-redundant transfrags starting with a different 5' exon" help="like -C but does not discard intron-redundant transfrags if they start with a different 5' exon" />
             <param argument="-X"  type="boolean" checked="false" truevalue="-X" falsevalue=""  label="discard the 'contained' transfrags also if ends stick out within the container's introns" help="like -C but also discard contained transfrags if transfrag ends stick out within the container's introns" />
@@ -184,14 +185,14 @@
                 <not_has_text text="-Q " />
                 <not_has_text text="--strict-match " />
                 <not_has_text text="-T " />
-                <not_has_text text="-s " />
+                <has_text_matching expression="^.*gffcompare((?!-s).)*$" /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is not set -->
                 <not_has_text text="-M " />
                 <not_has_text text="-N " />
                 <has_text text="-e 100 " />
                 <has_text text="-d 100 " />
                 <not_has_text text="-D " />
                 <not_has_text text="--no-merge " />
-                <has_text text="-p TCONS " />
+                <has_text text="-p 'TCONS' " />
                 <not_has_text text="-C " />
                 <not_has_text text="-A " />
                 <not_has_text text="-X " />
@@ -221,12 +222,12 @@
                 <not_has_text text="-R " />
                 <not_has_text text="-Q " />
                 <has_text text="-T " />
-                <has_text text="-s " />
+                <has_text_matching expression="gffcompare.*-s " /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is set -->
                 <not_has_text text="-M " />
                 <not_has_text text="-N " />
                 <has_text text="-e 100 " />
                 <has_text text="-d 100 " />
-                <has_text text="-p TCONS " />
+                <has_text text="-p 'TCONS' " />
                 <not_has_text text="-C " />
                 <not_has_text text="-A " />
                 <not_has_text text="-X " />
@@ -255,12 +256,12 @@
                 <not_has_text text="-R " />
                 <not_has_text text="-Q " />
                 <has_text text="-T " />
-                <has_text text="-s " />
+                <has_text_matching expression="gffcompare.*-s " />
                 <not_has_text text="-M " />
                 <not_has_text text="-N " />
                 <has_text text="-e 100 " />
                 <has_text text="-d 100 " />
-                <has_text text="-p TCONS " />
+                <has_text text="-p 'TCONS' " />
                 <not_has_text text="-C " />
                 <not_has_text text="-A " />
                 <not_has_text text="-X " />
@@ -287,19 +288,23 @@
             <assert_command>
                 <not_has_text text="-R " />
                 <not_has_text text="-Q " />
+                <not_has_text text="--strict-match " />
                 <not_has_text text="-T " />
                 <not_has_text text="-M " />
                 <not_has_text text="-N " />
                 <has_text text="-e 100 " />
                 <has_text text="-d 100 " />
-                <has_text text="-p TCONS " />
+                <not_has_text text="-D " />
+                <not_has_text text="--no-merge " />
+                <not_has_text text="--chr-stats" />
+                <has_text text="-p 'TCONS' " />
                 <not_has_text text="-C " />
                 <not_has_text text="-A " />
                 <not_has_text text="-X " />
                 <not_has_text text="-K " />
             </assert_command>
-            <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="6" />
-            <output file="gffcompare_out2.loci" name="transcripts_loci" lines_diff="2" />
+            <output file="gffcompare_out2.stats" name="transcripts_stats" />
+            <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
             <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
             <output file="gffcompare_out2.gtf" name="transcripts_combined" />
             <output_collection name="refmap_output" type="list" count="2">
@@ -346,7 +351,7 @@
                 <has_text text="-D " />
                 <has_text text="--no-merge " />
                 <has_text text="--chr-stats" />
-                <not_has_text text="-p TCONS " />
+                <has_text text="-p 'TCONS' " />
                 <not_has_text text="-C " />
                 <not_has_text text="-A " />
                 <not_has_text text="-X " />
@@ -382,12 +387,16 @@
             <assert_command>
                 <not_has_text text="-R " />
                 <not_has_text text="-Q " />
+                <not_has_text text="--strict-match " />
                 <not_has_text text="-T " />
                 <not_has_text text="-M " />
                 <not_has_text text="-N " />
                 <has_text text="-e 100 " />
                 <has_text text="-d 100 " />
-                <has_text text="-p OTHER " />
+                <not_has_text text="-D " />
+                <not_has_text text="--no-merge " />
+                <not_has_text text="--chr-stats" />
+                <has_text text="-p 'OTHER' " />
                 <has_text text="-C " />
                 <has_text text="-A " />
                 <has_text text="-X " />
@@ -416,19 +425,23 @@
             <assert_command>
                 <not_has_text text="-R " />
                 <not_has_text text="-Q " />
+                <not_has_text text="--strict-match " />
                 <has_text text="-T " />
                 <not_has_text text="-M " />
                 <not_has_text text="-N " />
                 <has_text text="-e 100 " />
                 <has_text text="-d 100 " />
-                <has_text text="-p TCONS " />
+                <not_has_text text="-D " />
+                <not_has_text text="--no-merge " />
+                <not_has_text text="--chr-stats" />
+                <has_text text="-p 'TCONS' " />
                 <not_has_text text="-C " />
                 <not_has_text text="-A " />
                 <not_has_text text="-X " />
                 <not_has_text text="-K " />
             </assert_command>
-            <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="6" />
-            <output file="gffcompare_out2.loci" name="transcripts_loci" lines_diff="2" />
+            <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="2" />
+            <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
             <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
             <output file="gffcompare_out2.gtf" name="transcripts_combined" />
         </test>
@@ -449,8 +462,8 @@
             <param name="discard_single_exon" value="" />
             <param name="max_dist_exon" value="100" />
             <param name="max_dist_group" value="100" />
-            <output file="gffcompare_out3.stats" name="transcripts_stats" lines_diff="6" />
-            <output file="gffcompare_out3.loci" name="transcripts_loci" />
+            <output file="gffcompare_out3.stats" name="transcripts_stats"/>
+            <output file="gffcompare_out3.loci" name="transcripts_loci" compare="sim_size" />
             <output file="gffcompare_out3.tracking" name="transcripts_tracking" />
             <output file="gffcompare_out3.gtf" name="transcripts_annotated" />
         </test>
--- a/test-data/gffcompare_out3.stats	Mon May 27 13:54:15 2019 -0400
+++ b/test-data/gffcompare_out3.stats	Thu Oct 17 03:01:26 2019 -0400
@@ -1,29 +1,29 @@
-# gffcompare v0.10.6 | Command line was:
+# gffcompare v0.11.2 | Command line was:
 #gffcompare -r ref_annotation -R -T -e 100 -d 100 -p TCONS gffcompare_in4_gtf
 #

 #= Summary for dataset: gffcompare_in4_gtf
 #     Query mRNAs :      35 in      29 loci  (15 multi-exon transcripts)
 #            (3 multi-transcript loci, ~1.2 transcripts per locus)
-# Reference mRNAs :      20 in       7 loci  (19 multi-exon)
+# Reference mRNAs :      19 in       6 loci  (19 multi-exon)
 # Super-loci w/ reference transcripts:        6
 #-----------------| Sensitivity | Precision  |
-        Base level:    72.6     |    60.7    |
-        Exon level:    80.0     |    55.7    |
+        Base level:    72.7     |    60.7    |
+        Exon level:    81.0     |    55.7    |
       Intron level:    81.2     |    64.4    |
 Intron chain level:    10.5     |    13.3    |
-  Transcript level:    10.0     |     5.7    |
-       Locus level:    28.6     |     6.9    |
+  Transcript level:    10.5     |     5.7    |
+       Locus level:    33.3     |     6.9    |

      Matching intron chains:       2
        Matching transcripts:       2
               Matching loci:       2

-          Missed exons:       3/85	(  3.5%)
+          Missed exons:       2/84	(  2.4%)
            Novel exons:      46/122	( 37.7%)
         Missed introns:      11/69	( 15.9%)
          Novel introns:      28/87	( 32.2%)
-           Missed loci:       0/7	(  0.0%)
+           Missed loci:       0/6	(  0.0%)
             Novel loci:      15/29	( 51.7%)

  Total union super-loci across all input datasets: 21