Mercurial > repos > galaxyp > openms_openswathmzmlfilecacher
comparison generate-foo.sh @ 14:fe39f583c19a draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906
| author | galaxyp |
|---|---|
| date | Fri, 14 Jun 2024 21:42:42 +0000 |
| parents | f82b320ceb90 |
| children |
comparison
equal
deleted
inserted
replaced
| 13:f82b320ceb90 | 14:fe39f583c19a |
|---|---|
| 1 #!/usr/bin/env bash | |
| 2 | |
| 3 # parse test definitions from OpenMS sources for a tool with a given id | |
| 4 function get_tests2 { | |
| 5 id=$1 | |
| 6 >&2 echo "generate tests for $id" | |
| 7 echo '<xml name="autotest_'"$id"'">' | |
| 8 | |
| 9 # get the tests from the CMakeLists.txt | |
| 10 # 1st remove some tests | |
| 11 # - OpenSwathMzMLFileCacher with -convert_back argument https://github.com/OpenMS/OpenMS/issues/4399 | |
| 12 # - IDRipper PATH gets empty causing problems. TODO But overall the option needs to be handled differentlt | |
| 13 # - several tools with duplicated input (leads to conflict when linking) | |
| 14 # - MaRaCluster with -consensus_out (parameter blacklister: https://github.com/OpenMS/OpenMS/issues/4456) | |
| 15 # - FileMerger with mixed dta dta2d input (ftype can not be specified in the test, dta can not be sniffed) | |
| 16 # - some input files are originally in a subdir (degenerated cases/), but not in test-data | |
| 17 # - OpenSwathAnalyzer 9/10: cachedMzML (not supported yet) | |
| 18 # - SiriusAdapter_4 depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010 | |
| 19 # - SiriusAdapter_10 should work in >2.8 https://github.com/OpenMS/OpenMS/issues/5869 | |
| 20 CMAKE=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | | |
| 21 sed 's@${DATA_DIR_SHARE}/@@g' | | |
| 22 grep -v 'OpenSwathMzMLFileCacher .*-convert_back' | | |
| 23 sed 's/${TMP_RIP_PATH}/""/' | | |
| 24 grep -v "MaRaClusterAdapter.*-consensus_out"| | |
| 25 grep -v "FileMerger_1_input1.dta2d.*FileMerger_1_input2.dta " | | |
| 26 sed 's@degenerate_cases/@@g' | | |
| 27 egrep -v 'TOPP_OpenSwathAnalyzer_test_3"|TOPP_OpenSwathAnalyzer_test_4"' | | |
| 28 sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' | | |
| 29 grep -v '"TOPP_SiriusAdapter_10"') | |
| 30 | |
| 31 # 1st part is a dirty hack to join lines containing a single function call, e.g. | |
| 32 # addtest(.... | |
| 33 # ....) | |
| 34 echo "$CMAKE" | sed 's/#.*//; s/^\s*//; s/\s*$//' | grep -v "^#" | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | | |
| 35 grep -iE "add_test\(\"(TOPP|UTILS)_.*/$id " | egrep -v "_prepare\"|_convert|WRITEINI|WRITECTD|INVALIDVALUE" | while read -r line | |
| 36 do | |
| 37 line=$(echo "$line" | sed 's/add_test("\([^"]\+\)"/\1/; s/)$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g') | |
| 38 # >&2 echo $line | |
| 39 test_id=$(echo "$line" | cut -d" " -f 1) | |
| 40 tool_id=$(echo "$line" | cut -d" " -f 2) | |
| 41 # >&2 echo "test_id $test_id" | |
| 42 if [[ $test_id =~ _out_?[0-9]? ]]; then | |
| 43 >&2 echo " skip $test_id $line" | |
| 44 continue | |
| 45 fi | |
| 46 if [[ ${id,,} != ${tool_id,,} ]]; then | |
| 47 >&2 echo " skip $test_id ($id != $tool_id) $line" | |
| 48 continue | |
| 49 fi | |
| 50 | |
| 51 #remove tests with set_tests_properties(....PROPERTIES WILL_FAIL 1) | |
| 52 if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then | |
| 53 >&2 echo " skip failing "$test_id | |
| 54 continue | |
| 55 fi | |
| 56 tes=" <test>\n" | |
| 57 line=$(fix_tmp_files "$line") | |
| 58 line=$(unique_files "$line") | |
| 59 # >&2 echo LINE $line | |
| 60 #if there is an ini file then we use this to generate the test | |
| 61 #otherwise the ctd file is used | |
| 62 #other command line parameters are inserted later into this xml | |
| 63 if grep -lq "\-ini" <<<"$line"; then | |
| 64 ini=$(echo $line | sed 's/.*-ini \([^ ]\+\).*/\1/') | |
| 65 ini="test-data/$ini" | |
| 66 else | |
| 67 ini="ctd/$tool_id.ctd" | |
| 68 fi | |
| 69 # >&2 echo "========================================================" | |
| 70 # >&2 echo "USING ini $ini" | |
| 71 cli=$(echo $line |cut -d" " -f3- | sed 's/-ini [^ ]\+//') | |
| 72 | |
| 73 ctdtmp=$(mktemp) | |
| 74 # using eval: otherwise for some reason quoted values are not used properly ('A B' -> ["'A", "B'"]) | |
| 75 # >&2 echo "python3 fill_ctd_clargs.py --ini_file $ini $cli" | |
| 76 eval "python3 fill_ctd_clargs.py --ini_file $ini $cli" > "$ctdtmp" | |
| 77 # >&2 echo $ctdtmp | |
| 78 # >&2 cat $ctdtmp | |
| 79 testtmp=$(mktemp) | |
| 80 # >&2 echo CTDConverter galaxy -i $ctdtmp -o $testtmp -s aux/tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf -p aux/hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib --test-condition "compare=sim_size" "delta_frac=0.7" | |
| 81 CTDConverter galaxy -i $ctdtmp -o $testtmp -s aux/tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf -p aux/hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib --test-condition "compare=sim_size" "delta_frac=0.7" > /dev/null | |
| 82 echo "<!-- $test_id -->" | |
| 83 cat $testtmp | grep -v '<output.*file=""' # | grep -v 'CHEMISTRY/' | |
| 84 | |
| 85 rm "$ctdtmp" "$testtmp" | |
| 86 | |
| 87 #> /dev/null | |
| 88 | |
| 89 #rm $testtmp | |
| 90 done | |
| 91 echo '</xml>' | |
| 92 } | |
| 93 | |
| 94 #some tests use the same file twice which does not work in planemo tests | |
| 95 #hence we create symlinks for each file used twice | |
| 96 function unique_files { | |
| 97 line=$@ | |
| 98 for arg in $@ | |
| 99 do | |
| 100 if [[ ! -f "test-data/$arg" ]]; then | |
| 101 continue | |
| 102 fi | |
| 103 cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n')) | |
| 104 while [[ $cnt -gt 1 ]]; do | |
| 105 new_arg=$(echo $arg | sed "s/\(.*\)\./\1_$cnt./") | |
| 106 ln -fs $arg test-data/$new_arg | |
| 107 line=$(echo $line | sed "s/\($arg.*\)$arg/\1$new_arg/") | |
| 108 cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n')) | |
| 109 done | |
| 110 done | |
| 111 | |
| 112 echo $line | |
| 113 } | |
| 114 | |
| 115 # options of out_type selects need to be fixed to Galaxy data types | |
| 116 function fix_out_type { | |
| 117 grep "^$1" "$2" | awk '{print $2}' | |
| 118 } | |
| 119 | |
| 120 #OpenMS tests output to tmp files and compare with FuzzyDiff to the expected file. | |
| 121 #problem: the extension of the tmp files is unusable for test generation. | |
| 122 #unfortunately the extensions used in the DIFF lines are not always usable for the CLI | |
| 123 #(e.g. for prepare_test_data, e.g. CLI expects csv but test file is txt) | |
| 124 #this function replaces the tmp file by the expected file. | |
| 125 function fix_tmp_files { | |
| 126 # >&2 echo "FIX $line" | |
| 127 ret="" | |
| 128 for a in $@; do | |
| 129 # >&2 echo " a "$a | |
| 130 if [[ ! $a =~ .tmp$ ]] && [[ ! $a =~ _tmp_ ]]; then | |
| 131 ret="$ret $a" | |
| 132 continue | |
| 133 fi | |
| 134 diff_line=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep '\${DIFF}.*'"$a") | |
| 135 # >&2 echo " diff_line "$diff_line | |
| 136 in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$diff_line) | |
| 137 # >&2 echo " in1 "$in1 | |
| 138 if [[ "$a" != "$in1" ]]; then | |
| 139 ret="$ret $a" | |
| 140 continue | |
| 141 fi | |
| 142 in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$diff_line) | |
| 143 in2=$(basename $in2 | sed 's/)$//') | |
| 144 # >&2 echo " in2 "$in2 | |
| 145 if [[ -f "test-data/$in2" ]]; then | |
| 146 ln -fs "$in1" "test-data/$in2" | |
| 147 ret="$ret $in2" | |
| 148 else | |
| 149 ret="$ret $a" | |
| 150 fi | |
| 151 done | |
| 152 # >&2 echo "--> $ret" | |
| 153 echo "$ret" | |
| 154 } | |
| 155 | |
| 156 function link_tmp_files { | |
| 157 # note this also considers commented lines (starting with a #) | |
| 158 # because of tests where the diff command is commented and we | |
| 159 # still want to use the extension of these files | |
| 160 cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/^\s*//; s/\s*$//' | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep "\${DIFF}" | while read -r line | |
| 161 do | |
| 162 in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$line) | |
| 163 in1=$(basename $in1 | sed 's/)$//') | |
| 164 in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$line) | |
| 165 in2=$(basename $in2 | sed 's/)$//') | |
| 166 if [[ "$in1" == "$in2" ]]; then | |
| 167 >&2 echo "not linking equal $in1 $in2" | |
| 168 continue | |
| 169 fi | |
| 170 ln -f -s $in1 test-data/$in2 | |
| 171 done | |
| 172 | |
| 173 find test-data/ -name "*.tmp" -print0 | | |
| 174 while IFS= read -r -d '' i; do | |
| 175 if [ ! -e test-data/$(basename $i .tmp) ]; then | |
| 176 ln -s $(basename $i) test-data/$(basename $i .tmp) | |
| 177 else | |
| 178 ln -fs $(basename $i) test-data/$(basename $i .tmp) | |
| 179 fi | |
| 180 done | |
| 181 } | |
| 182 | |
| 183 | |
| 184 | |
| 185 # parse data preparation calls from OpenMS sources for a tool with a given id | |
| 186 function prepare_test_data { | |
| 187 # id=$1 | |
| 188 # | egrep -i "$id\_.*[0-9]+(_prepare\"|_convert)?" | |
| 189 | |
| 190 # TODO SiriusAdapter depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010 | |
| 191 cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/#.*$//'| sed 's/^\s*//; s/\s*$//' | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | | |
| 192 sed 's/degenerate_cases\///' | | |
| 193 egrep -v "WRITEINI|WRITECTD|INVALIDVALUE|DIFF" | | |
| 194 grep add_test | | |
| 195 egrep "TOPP|UTILS" | | |
| 196 sed 's@${DATA_DIR_SHARE}/@@g;'| | |
| 197 sed 's@${TMP_RIP_PATH}@./@g'| | |
| 198 sed 's@TOFCalibration_ref_masses @TOFCalibration_ref_masses.txt @g; s@TOFCalibration_const @TOFCalibration_const.csv @'| | |
| 199 sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' | | |
| 200 while read line | |
| 201 do | |
| 202 test_id=$(echo "$line" | sed 's/add_test(//; s/"//g; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f1) | |
| 203 | |
| 204 if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then | |
| 205 >&2 echo " skip failing "$test_id | |
| 206 continue | |
| 207 fi | |
| 208 | |
| 209 line=$(echo "$line" | sed 's/add_test("//; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f2-) | |
| 210 # line="$(fix_tmp_files $line)" | |
| 211 echo 'echo executing "'$test_id'"' | |
| 212 echo "$line > $test_id.stdout 2> $test_id.stderr" | |
| 213 echo "if [[ \"\$?\" -ne \"0\" ]]; then >&2 echo '$test_id failed'; >&2 echo -e \"stderr:\n\$(cat $test_id.stderr | sed 's/^/ /')\"; echo -e \"stdout:\n\$(cat $test_id.stdout)\";fi" | |
| 214 done | |
| 215 } |
